[PATCH 1/1] Control Page Cache The idea is to control/watch the page

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



>From bc01c27e4ec3d97ea3e1b3b53bbe1938f6d20a34 Mon Sep 17 00:00:00 2001
From: Manish Sharma <manishrma@xxxxxxxxx>
Date: Thu, 4 Apr 2013 10:12:59 +0530
Subject: [PATCH 1/1] Control Page Cache The idea is to control/watch the page
 cache pages wrt process & disk. The Control page cache
 patch will show below details:- 1. Total pages in page
 cache per device    # cat /proc/pages_per_device
 2.Threads allocating pages wrt disk    # cat
 /proc/threads_per_device 3.Total pages per device wrt
 thread        # cat /proc/<pid>/pages_per_device 4.Set
 the maximum pages limit from device    # cat
 /proc/max_limit/<device> 5.Set the maximum thread limit
    # cat /proc/<pid>/max_limit ToDo:- 1. Currently
 we are killing the task which exceed the limit but need
 to check a proper way. Task should not get killed It
 might wait.


Signed-off-by: Manish Sharma <manishrma@xxxxxxxxx>
---
 block/genhd.c             | 1079 ++++++++++++++++++++++++++++++++++++++++++++-
 block/partition-generic.c |    8 +
 fs/proc/base.c            |  163 +++++++
 include/linux/genhd.h     |   38 +-
 include/linux/rmap.h      |    4 +
 include/linux/sched.h     |    5 +
 kernel/fork.c             |   24 +
 lib/Kconfig.debug         |   10 +
 mm/filemap.c              |   11 +
 mm/oom_kill.c             |   17 +
 mm/rmap.c                 |   97 +++-
 11 files changed, 1453 insertions(+), 3 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 7dcfdd8..0840de4 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -42,6 +42,11 @@ static void disk_add_events(struct gendisk *disk);
 static void disk_del_events(struct gendisk *disk);
 static void disk_release_events(struct gendisk *disk);

+/* Control Page Cache */
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+struct proc_dir_entry *proc_cpcfs;
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
+
 /**
  * disk_get_part - get partition
  * @disk: disk to look partition from
@@ -568,6 +573,1063 @@ exit:
    disk_part_iter_exit(&piter);
 }

+#ifdef CONFIG_CONTROL_PAGE_CACHE
+/**
+ * cpc_get_part_by_name - get partition by name
+ * @buf: name of partition
+ *
+ * It iterates over the partition and find out
+ * the requested partition and return the part
+ * structure.
+ *
+ * CONTEXT:
+ * RCU read locked.  The returned partition pointer is valid only
+ * while preemption is disabled.
+ *
+ * RETURNS:
+ * Found partition on success, NULL is returned if no partition matches
+ */
+struct hd_struct *cpc_get_part_by_name(char *buf)
+{
+    char hdname[BDEVNAME_SIZE];
+    struct class_dev_iter iter;
+    struct device *dev;
+
+    class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+    while ((dev = class_dev_iter_next(&iter))) {
+        struct gendisk *disk = dev_to_disk(dev);
+        struct disk_part_iter piter;
+        struct hd_struct *part;
+
+        /*
+         * Don't show empty devices or things that have been
+         * surpressed
+         */
+        if (get_capacity(disk) == 0 ||
+            (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
+            continue;
+
+        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+        while ((part = disk_part_iter_next(&piter))) {
+            disk_name(disk, part->partno, hdname);
+            if (!strncmp(hdname, buf, strlen(buf))) {
+                disk_part_iter_exit(&piter);
+                class_dev_iter_exit(&iter);
+                return part;
+            }
+
+        }
+        disk_part_iter_exit(&piter);
+    }
+    class_dev_iter_exit(&iter);
+    return NULL;
+}
+
+/**
+ * proc_cpc_part_read - proc read for max limit of partition
+ * @file: file pointer
+ * @buf: user buffer pointer
+ * @len: length
+ * @ppos: position offset
+ *
+ * It gets the partition and prints the maximum limit of pages
+ *
+ * RETURNS:
+ * Success
+ */
+
+static ssize_t proc_cpc_part_read(struct file *file, char __user *buf,
+                                            size_t len, loff_t *ppos)
+{
+    struct hd_struct *part = NULL;
+
+    part = cpc_get_part_by_name(file->f_path.dentry->d_iname);
+    if (NULL == part)
+        printk(KERN_EMERG"No part\n");
+    else
+        printk(KERN_EMERG"%ld\n", part->max_limit);
+    return 0;
+
+}
+
+/**
+ * proc_cpc_part_write - proc write for max limit of partition
+ * @file: file pointer
+ * @buf: user buffer pointer
+ * @count: count
+ * @ppos: position offset
+ *
+ * It gets the partition and updates the maximum limit of pages
+ *
+ * RETURNS:
+ * count on Success; Error on Failure
+ */
+static ssize_t proc_cpc_part_write(struct file *file, const char __user *buf,
+                                            size_t count, loff_t *ppos)
+{
+    struct hd_struct *part = NULL;
+    char buffer[CPC_MAX_BUFFER] = {0};
+    long max_limit = -1;
+    int err = 0;
+
+    if (count > CPC_MAX_BUFFER) {
+        printk(KERN_EMERG"ERROR:Enter only int value %d ::\n", count);
+        return -EINVAL;
+    }
+    if ((NULL == file->f_path.dentry) && (NULL == file))
+        return -EINVAL;
+
+    part = cpc_get_part_by_name(file->f_path.dentry->d_iname);
+    if (NULL == part) {
+        printk(KERN_EMERG"No part\n");
+        return -EINVAL;
+    } else {
+        if (copy_from_user(buffer, buf, count)) {
+            printk(KERN_EMERG"ERROR:%d\n", count);
+            return -EFAULT;
+        }
+
+        err = strict_strtol(strstrip(buffer), 0, &max_limit);
+        if (err) {
+            printk(KERN_EMERG"ERROR:err %d \n", err);
+            return -EINVAL;
+        }
+        if (max_limit >= CPC_MAX_LIMIT) { /* 2GB */
+            /* Later we will make it upto the RAM value */
+            printk(KERN_EMERG"Entered value >= 2GB\n");
+            return -EINVAL;
+        } else {
+            part->max_limit = max_limit;
+            printk(KERN_EMERG"Entered value %ld\n" , part->max_limit);
+        }
+    }
+    return count;
+}
+
+static const struct file_operations proc_cpc_part_ops = {
+    .read        = proc_cpc_part_read,
+    .write        = proc_cpc_part_write,
+};
+
+/**
+ * cpc_hash
+ *        Returns the hashed key for the pid
+ * @pid: process/thread pid
+ *
+ * It returns the hashed key of the pid.
+ *
+ * RETURNS:
+ * hashed value with CPC_MAX_PID_PER_PART
+ */
+int cpc_hash(int pid)
+{
+    return pid % CPC_MAX_PID_PER_PART;
+}
+
+/**
+ * cpc_invoke_oom
+ *        Invoke OOM on the current task
+ * @void:
+ *
+ * It invoke OOM on the current task.
+ *
+ * RETURNS:
+ *
+ */
+void cpc_invoke_oom(void)
+{
+    printk(KERN_EMERG"Invoking OOM on %d\n", current->pid);
+    if (cpc_oom_kill_task(current))
+        printk(KERN_EMERG"OOM failed on task\n");
+
+}
+
+/**
+ * cpc_part_limit
+ *        check the limit on partition
+ * @partition: Partition whose limit to be checked
+ *
+ * It checks the limit of the partition if the limit
+ * is reached which is set as per the user compared with
+ * the total pages fetched from the device.
+ *
+ * CONTEXT:
+ * read or write locked.
+ *
+ * RETURNS:
+ *
+ */
+int cpc_part_limit(struct hd_struct *part)
+{
+    int ret = 0;
+    if (NULL != part) {
+        /*read_lock_irq(&part->cpc_lock);*/
+        if ((part->max_limit <= part->total_pages) && \
+            (-1 != part->max_limit)) {
+            ret = 1;
+        }
+        /*read_unlock_irq(&part->cpc_lock); */
+    }
+    return ret;
+}
+
+/**
+ * cpc_get_part
+ *        Get the partition from the mapping
+ * @mapping: Mapping of the file mapped page w.r.t disk.
+ *
+ * It checks the inode and get the parition from the block
+ * device.
+ * either the file is access from the partition or the
+ * raw disk is accessed both the cases are handled here
+ * if there is any other case we will observe the dump.
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *
+ */
+struct hd_struct *cpc_get_part(struct address_space *mapping)
+{
+    struct inode *inode = mapping->host;
+    struct hd_struct *hd = NULL;
+    struct block_device *bdev = NULL;
+
+    if (NULL != inode) {
+        if ((NULL != inode->i_sb->s_bdev) && (NULL != inode->i_sb)) {
+            bdev = inode->i_sb->s_bdev;
+            if (bdev->bd_part)
+                hd = bdev->bd_part;
+            return hd;
+        } else {
+            /* Case where super block is not yet filled so need
+               to go other way round
+               Cant use this case every time because the inode is
+               not of device always :)
+               here we are working on zeroth partition every time
+            */
+            if (NULL != inode->i_bdev) {
+                if (inode->i_bdev->bd_part) {
+                    hd = inode->i_bdev->bd_part;
+                    return hd;
+                }
+            }
+        }
+    }
+    /* Never Comes here */
+    /* dump_stack();*/
+    printk(KERN_EMERG"ERROR:Never comes here ::No Device Found\n");
+    return NULL;
+}
+
+/**
+ * cpc_check_limit
+ *        checks the limit of the tsk and the partition
+ * @tsk: task structure.
+ * @hd: partition structure.
+ *
+ * It checks the limit of the parition & the tsk
+ * w.r.t the total pages fetched and the max limit set.
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ * 0: Failure(When limit is crossed) and
+ * 1: Success(When limit is not crossed)
+ *
+ */
+int cpc_check_limit(struct task_struct *tsk, struct hd_struct *hd)
+{
+    struct task_struct *task = NULL;
+    if (!tsk || !hd) {
+        printk(KERN_EMERG"NO tsk or partition [%s]\n", __FUNCTION__);
+        /* if we return 0 here it will invoke OOM */
+        return 1;
+    }
+
+    if (!cpc_part_limit(hd)) {
+        if (tsk->pid == tsk->tgid) {
+            if ((tsk->total_pages < tsk->max_limit) || (-1 == tsk->max_limit)) {
+                return 1;
+            }
+        } else {
+            task = tsk->group_leader;
+            if (task) {
+                if (pid_alive(task)) {
+                    get_task_struct(task);
+                    if ((task->total_pages < task->max_limit) || (-1 == task->max_limit)) {
+                        put_task_struct(task);
+                        return 1;
+                    }
+                    put_task_struct(task);
+                }
+            } else
+                printk(KERN_EMERG"#### NO PARENT TASK\n");
+        }
+    }
+    return 0;
+}
+
+/**
+ * cpc_find_tgid
+ *        Finds the tgid of the process in the current partition
+ * @tgid: task group leader id.
+ * @part: partition structure.
+ *
+ * It iterates over the list and checks if any cpc structure
+ * i.e entry exists for the pid(thread/process) with passed tgid
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ * cpc structure of the process with tgid on Success
+ * NULL if didnt find the CPC (Control Page Cache)
+ *
+ */
+
+struct cpc_struct *cpc_find_tgid(int tgid, struct hd_struct *part)
+{
+    struct hlist_node *list;
+    struct cpc_struct *cpc = NULL;
+    struct hlist_head *pid_tbl = NULL;
+
+    /* read_lock_irq(&part->cpc_lock); */
+    pid_tbl = part->pid_tbl;
+
+    if (tgid <= 0 || (NULL == pid_tbl))
+        return NULL;
+
+    hlist_for_each_entry_rcu(cpc, list,
+            &pid_tbl[cpc_hash(tgid)], pid_list) {
+        if (cpc) {
+            if ((cpc->tgid == tgid) && (cpc->pid != cpc->tgid)) {
+                /* read_unlock_irq(&part->cpc_lock); */
+                return cpc;
+            }
+        }
+    }
+
+    /*read_unlock_irq(&part->cpc_lock);*/
+    return NULL;
+}
+/**
+ * cpc_find_pid
+ *        Finds the pid of the process in the current partition
+ * @tgid: task group leader id.
+ * @part: partition structure.
+ *
+ * It iterates over the list in the partition and checks if
+ * any cpc structure i.e entry exists for the pid(thread/process)
+ * with passed pid
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ * cpc structure of the process with pid on Success
+ * NULL if didnt find the CPC (Control Page Cache)
+ *
+ */
+struct cpc_struct *cpc_find_pid(int pid, struct hd_struct *part)
+{
+    struct hlist_node *list = NULL;
+    struct cpc_struct *cpc = NULL;
+    struct hlist_head *pid_tbl = NULL;
+    int p = cpc_hash(pid);
+
+    /*read_lock_irq(&part->cpc_lock);*/
+
+    pid_tbl = part->pid_tbl;
+    if (pid <= 0 || (NULL == pid_tbl))
+        return NULL;
+
+    hlist_for_each_entry_rcu(cpc, list,
+            &pid_tbl[p], pid_list){
+        if (cpc)
+            if (cpc->pid == pid) {
+                /*read_unlock_irq(&part->cpc_lock);*/
+                return cpc;
+            }
+    }
+
+    /*read_unlock_irq(&part->cpc_lock);*/
+    return NULL;
+}
+
+/**
+ * cpc_add_to_thread
+ *        Account the pages for the task w.r.t the partition
+ * @mapping: file mapped page mapping.
+ * @tsk: task structure.
+ *
+ * This function gets the partition and finds if the cuurent
+ * task already have the cpc entry in its structure.
+ * If not then it creates the entry else it increments the
+ * accounted pages
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  -1: if failure
+ *  0 : if Success
+ *
+ */
+int cpc_add_to_thread(struct address_space *mapping, struct task_struct *tsk)
+{
+    struct cpc_struct *cpc = NULL;
+    struct hlist_head *pid_tbl = NULL;
+    struct hd_struct *part = cpc_get_part(mapping);
+    int ret = 0;
+    int pid, tgid;
+
+    if (!part || !tsk) {
+        printk(KERN_EMERG"Error improper value [%s]\n", __FUNCTION__);
+        ret = -1;
+    }
+
+    write_lock_irq(&part->cpc_lock);
+    if (!cpc_check_limit(tsk, part)) {
+        write_unlock_irq(&part->cpc_lock);
+        cpc_oom_kill_task(tsk);
+        return -1;
+    }
+
+    pid = tsk->pid;
+    tgid = tsk->tgid;
+
+    pid_tbl = part->pid_tbl;
+    if (!pid_tbl) {
+        write_unlock_irq(&part->cpc_lock);
+        printk(KERN_EMERG"Error improper value [%s]\n", __FUNCTION__);
+        return -1;
+    }
+    /* find the current pid entry  */
+    cpc = cpc_find_pid(pid, part);
+    if (NULL == cpc) {
+        cpc = kzalloc(sizeof(struct cpc_struct), GFP_KERNEL);
+        if (NULL == cpc) {
+            write_unlock_irq(&part->cpc_lock);
+            return -ENOMEM;
+        }
+        cpc->pid = pid;
+        cpc->tgid = tgid;
+        cpc->actual_pages = 0;
+        hlist_add_head_rcu(&cpc->pid_list, &pid_tbl[cpc_hash(cpc->pid)]);
+    }
+
+    cpc->actual_pages++;
+    write_unlock_irq(&part->cpc_lock);
+
+#ifdef CPC_DBG_2
+    printk(KERN_EMERG"[%s] %d::%d(%d)\n", __FUNCTION__, part->partno, pid, cpc->actual_pages);
+#endif
+    return ret;
+}
+
+/**
+ * cpc_del_from_thread
+ *        Remove the pages for the task w.r.t the partition
+ * @mapping: file mapped page mapping.
+ * @tsk: task structure.
+ *
+ * This function gets the partition and finds if the cuurent
+ * task already have the cpc entry in its structure.
+ * If not then it creates the entry else it decrements the
+ * accounted pages
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  -1: if failure
+ *  0 : if Success
+ *
+ */
+int cpc_del_from_thread(struct address_space *mapping, struct task_struct *tsk)
+{
+    struct cpc_struct *cpc = NULL;
+    struct hlist_head *pid_tbl = NULL;
+    struct hd_struct *part = NULL;
+    int pid, tgid;
+
+    if (!mapping || !tsk) {
+        return -1;
+    }
+
+    part = cpc_get_part(mapping);
+    if (!part) {
+        /*printk(KERN_EMERG"[%s]Error improper value \n",__FUNCTION__);*/
+        return -1;
+    }
+
+    write_lock_irq(&part->cpc_lock);
+    pid = tsk->pid;
+    tgid = tsk->tgid;
+
+    pid_tbl = part->pid_tbl;
+    if (!pid_tbl) {
+        /*printk(KERN_EMERG"[%s]Error improper value \n",__FUNCTION__); */
+        write_unlock_irq(&part->cpc_lock);
+        return -1;
+    }
+
+    /* find if the current pid is available */
+    cpc = cpc_find_pid(pid, part);
+    if (NULL == cpc) {
+        if (pid != tgid) {
+            /* and if its a thread check for its group
+             * leader pid if available in the partition
+             * it can be that the thread is removing the
+             * pages */
+            cpc = cpc_find_pid(tgid, part);
+            if (NULL == cpc) {
+                /* No group leader Still we again will
+                 * look for any thread of the same
+                 * group leader is available : who has the page
+                 * from this device
+                 */
+                cpc = cpc_find_tgid(tgid, part);
+                if (NULL == cpc) {
+                    /* No CPC till yet wierd case then who has owned this
+                     * We should not come here
+                     * OR
+                     * We might have the page from the dup that belongs
+                     * to this partition and we are freeing it :O
+                     */
+                    goto unlock;
+                }
+            }
+        } else {
+            if (tsk->dup) {
+                tsk->dup--;
+                tsk->total_pages--;
+                goto unlock;
+            }
+            /* So we are the Process and we dont have dup
+             * pages with us so we search for any other pages
+             * of the threads in this partition list
+             * so here checking for tgid to be same as our pid
+             * this means it is in the other thread of same parent
+             * this is just accounting so be careful.
+             * as all threads have same mm.
+             */
+            cpc = cpc_find_tgid(pid, part);
+            if (NULL == cpc) {
+                /* Didn't find anything so
+                 * we cant do much except escaping
+                 */
+                goto unlock;
+            }
+        }
+    }
+    cpc->actual_pages--;
+    if (!cpc->actual_pages) {
+        hlist_del_rcu(&cpc->pid_list);
+        kfree(cpc);
+    }
+unlock:
+    write_unlock_irq(&part->cpc_lock);
+
+#ifdef CPC_DBG_2
+    printk(KERN_EMERG"[%s] %d::%d(%d)\n", __FUNCTION__, part->partno,\
+            pid, cpc->actual_pages);
+#endif
+
+    return 0;
+}
+
+/**
+ * cpc_shift_acct
+ *        Shift the pages from the thread to its respt.
+ *        process when the thread is getting killed.
+ * @tsk: task structure.
+ *
+ * This function iterate over the partition and adds
+ * the pages to the group leader of the thread in the
+ * specific parition in the respective list
+ * of the device partition;
+ * Removal is taken care by the cpc_del_partition
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void cpc_shift_acct(struct task_struct *tsk)
+{
+    struct class_dev_iter iter;
+    struct disk_part_iter piter;
+    struct device *dev;
+    struct cpc_struct *cpc = NULL, *cpc_p = NULL;
+    struct hlist_head *pid_tbl = NULL;
+    int pid = 0;
+
+    if (tsk->pid == tsk->tgid) {
+        printk(KERN_EMERG"ERROR:Shifting req for Parent\n");
+        return ;
+    }
+    class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+    while ((dev = class_dev_iter_next(&iter))) {
+        struct gendisk *disk = dev_to_disk(dev);
+        struct hd_struct *part;
+        /*
+         * Don't show empty devices or things that have been
+         * surpressed
+         */
+        if (get_capacity(disk) == 0 ||
+            (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
+            continue;
+
+        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+        while ((part = disk_part_iter_next(&piter))) {
+            write_lock_irq(&part->cpc_lock);
+            pid_tbl = part->pid_tbl;
+            if (!pid_tbl) {
+                printk(KERN_EMERG"#####Error improper value [%s]\n", __FUNCTION__);
+                write_unlock_irq(&part->cpc_lock);
+                break;
+            }
+            cpc = cpc_find_pid(tsk->pid, part);
+            if (cpc) {
+                pid = tsk->tgid;
+                cpc_p = cpc_find_pid(pid, part);
+                if (NULL == cpc_p) {
+                    write_unlock_irq(&part->cpc_lock);
+                    continue;
+                }
+                cpc_p->actual_pages += cpc->actual_pages;
+                printk(KERN_EMERG"Shifting %d ::%d(%d) to %d(%d) \n", part->partno, tsk->pid,\
+                        cpc->actual_pages, pid, cpc_p->actual_pages);
+                if (!cpc_p->actual_pages) {
+                    hlist_del_rcu(&cpc->pid_list);
+                    kfree(cpc);
+                    cpc = NULL;
+                }
+                hlist_del_rcu(&cpc->pid_list);
+                kfree(cpc);
+                cpc = NULL;
+            }
+            write_unlock_irq(&part->cpc_lock);
+        }
+        disk_part_iter_exit(&piter);
+    }
+    class_dev_iter_exit(&iter);
+}
+
+/**
+ * cpc_inc
+ *        Increment the total pages fetched from the
+ *        respective partition in the page cache
+ * @mapping: File mapped page mapping of device.
+ *
+ * This function gets the partition from the mapping
+ * checks if the limit is there and then increment the
+ * total pages of the partition.
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void cpc_inc(struct address_space *mapping)
+{
+    struct hd_struct *hd;
+    hd = cpc_get_part(mapping);
+    if (NULL != hd) {
+        read_lock_irq(&hd->cpc_lock);
+        if (cpc_part_limit(hd)) {
+            read_unlock_irq(&hd->cpc_lock);
+            cpc_invoke_oom();
+        }
+        read_unlock_irq(&hd->cpc_lock);
+        hd->total_pages += 1;
+    } else
+        printk(KERN_EMERG"##[%s]## No partition\n", __FUNCTION__);
+}
+/**
+ * cpc_dec
+ *        Decrement the total pages freed from the
+ *        respective partition from the page cache
+ * @mapping: File mapped page mapping of device.
+ *
+ * This function gets the partition from the mapping
+ * and then decrement the total pages of the partition.
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void cpc_dec(struct address_space *mapping)
+{
+    struct hd_struct *hd;
+    hd = cpc_get_part(mapping);
+    if (NULL != hd) {
+        hd->total_pages -= 1;
+    } else
+        printk(KERN_EMERG"##[%s]## No partition\n", __FUNCTION__);
+
+}
+/**
+ * del_part_htbl
+ *        This function calls before deleting the partition
+ *        so cleaning up the rest. remove all of the stuff
+ *        and exit.
+ * @part: Partition to delete.
+ *
+ * This function removed all the cpc entries and free the
+ * memory allocated for the hash tables etc
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  -1: Failure
+ *   0: Success
+ *
+ */
+int del_part_htbl(struct hd_struct *part)
+{
+    int htbl = 0;
+    char hdname[BDEVNAME_SIZE];
+    struct gendisk *disk;
+    struct hlist_head *pid_tbl = NULL;
+    struct hlist_node *list = NULL;
+    struct cpc_struct *cpc = NULL;
+
+    if (!part) {
+        printk(KERN_EMERG" Deleting part No part\n");
+        dump_stack();
+        return -1;
+    }
+
+    write_lock_irq(&part->cpc_lock);
+    disk = part_to_disk(part);
+    part->total_pages = 0;
+    part->max_limit = -1;
+
+
+    if (part->pid_tbl) {
+        pid_tbl = part->pid_tbl;
+        for (htbl = 0; htbl < CPC_MAX_PID_PER_PART; htbl++) {
+            hlist_for_each_entry_rcu(cpc, list,
+                &pid_tbl[htbl], pid_list){
+                if (cpc) {
+                    hlist_del_rcu(&cpc->pid_list);
+                    kfree(cpc);
+                    cpc = NULL;
+                }
+            }
+        }
+        kfree(part->pid_tbl);
+        part->pid_tbl = NULL;
+    }
+
+    disk_name(disk, part->partno, hdname);
+    if (NULL != proc_cpcfs) {
+        remove_proc_entry(hdname, proc_cpcfs);
+    }
+    write_unlock_irq(&part->cpc_lock);
+
+    return 0;
+}
+
+/**
+ * init_part_htbl
+ *        This function calls after creating the partition
+ *        so that we can create the hash table entries and
+ *        initialize the structures
+ * @part: Partition to init.
+ *
+ * This function initialize all the cpc entries and
+ * creates the hash table enteries in the parition.
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void init_part_htbl(struct hd_struct *part)
+{
+    int htbl = 0;
+    char hdname[BDEVNAME_SIZE];
+    struct gendisk *disk;
+
+    if (!part) {
+        printk(KERN_EMERG"Will not come here\n");
+        dump_stack();
+    }
+
+    disk = part_to_disk(part);
+    part->total_pages = 0;
+    part->max_limit = -1;
+    part->pid_tbl = kmalloc(CPC_MAX_PID_PER_PART * sizeof(*(part->pid_tbl)), GFP_KERNEL);
+    if (!part->pid_tbl) {
+        printk(KERN_EMERG"[%s]ERROR: unable to allocate mem!\n", __FUNCTION__);
+        dump_stack();
+    }
+    for (htbl = 0; htbl < CPC_MAX_PID_PER_PART; htbl++)
+        INIT_HLIST_HEAD(&part->pid_tbl[htbl]);
+    rwlock_init(&part->cpc_lock);
+    disk_name(disk, part->partno, hdname);
+    if (proc_cpcfs == NULL) {
+        printk(KERN_EMERG"### unable to create proc entry [%s]\n", hdname);
+    }
+    proc_create(hdname, 0, proc_cpcfs, &proc_cpc_part_ops);
+
+}
+/**
+ * init_disk_htbl
+ *        This function initialize the disk
+ *        i.e iterate over all the partitions and
+ *        initialize the hash tables and cpc structures
+ * @disk: gendisk structure.
+ *
+ * This function initialize all the disk partitions
+ * with cpc entries and creates the hash table enteries
+ * in all the parition.
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void init_disk_htbl(struct gendisk *disk)
+{
+
+    struct disk_part_iter piter;
+    struct hd_struct *part;
+
+    disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+    while ((part = disk_part_iter_next(&piter))) {
+        if (part == &disk->part0)
+            continue;
+        init_part_htbl(part);
+    }
+    disk_part_iter_exit(&piter);
+
+}
+/**
+ * cpc_del_part
+ *        Exported CPC delete partition function.
+ * @part: Partition struct.
+ *
+ * partition is already invalidated so no issue
+ * This function delete the partition entries
+ * removes cpc and hash table enteries
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void cpc_del_part(struct hd_struct *part)
+{
+    del_part_htbl(part);
+}
+
+/**
+ * cpc_del_part
+ *        Exported CPC init partition function.
+ * @part: Partition struct.
+ *
+ * partition is already created
+ * This function init the partition entries
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void cpc_init_part(struct hd_struct *part)
+{
+    init_part_htbl(part);
+}
+
+/**
+ * cpc_pages_per_device
+ *        Displays the proc entries in the different case
+ *        1. /proc/pages_per_device
+ *        2. /proc/threads_per_device
+ *        3. /proc/<pid>/pages_per_device
+ * @pid: different in above three cases.
+ *
+ * This function iterate over the partitions and prints
+ * all the details depending upon the passed value.
+ * It displays the structures and the accounted pages for
+ * each process /thread w.r.t each device
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  void:
+ *
+ */
+void cpc_pages_per_device(int pid)
+{
+    struct class_dev_iter iter;
+    struct device *dev;
+    unsigned int actual_pages = 0;
+
+    if (0 == pid)
+        printk("Device        Total pages\n");
+    else if (-1 == pid)
+        printk("Device        Pid[Pages:Tgid,Pages] \n");
+    else
+        printk("Device        Pages \n");
+
+    class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
+    while ((dev = class_dev_iter_next(&iter))) {
+        struct gendisk *disk = dev_to_disk(dev);
+        struct disk_part_iter piter;
+        struct hd_struct *part;
+        char buf[BDEVT_SIZE];
+        struct cpc_struct *cpc = NULL;
+        struct hlist_head *pid_tbl = NULL;
+        int cnt = 0;
+        int total_pages = 0;
+        struct task_struct *tsk = NULL;
+
+        /*
+         * Don't show empty devices or things that have been
+         * surpressed
+         */
+        if (get_capacity(disk) == 0 ||
+            (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO))
+            continue;
+
+        /*
+         * Note, unlike /proc/partitions, I am showing the
+         * numbers in hex - the same format as the root=
+         * option takes.
+         */
+        disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+        while ((part = disk_part_iter_next(&piter))) {
+            printk(" %s\t", disk_name(disk, part->partno, buf));
+            read_lock_irq(&part->cpc_lock);
+            pid_tbl = part->pid_tbl;
+
+            if (0 == pid) {
+                /* total pages for partition*/
+                printk("%u ", part->total_pages);
+            } else if (-1 == pid) {
+                /* all pids for partitions*/
+                struct hlist_node *pid_list;
+
+                if (NULL == pid_tbl)
+                        printk("0 ");
+                else{
+                    for (cnt = 0; cnt < CPC_MAX_PID_PER_PART; cnt++) {
+                        hlist_for_each_entry(cpc, pid_list, &pid_tbl[cnt], pid_list) {
+                            tsk = get_pid_task(find_get_pid(cpc->tgid), PIDTYPE_PID);
+                            if (tsk) {
+                                /*if (pid_alive(tsk)) {*/
+                                    total_pages = tsk->total_pages;
+                                    put_task_struct(tsk);
+                                /*}*/
+                            } else {
+                                /*case where the thread/process has gone print 0 in this case*/
+                                /*printk(KERN_EMERG"No parent %d:%d \n",cpc->pid, cpc->tgid);*/
+                            }
+                            /*printk("%d[%d:%d,%d] ",cpc->pid,cpc->actual_pages,cpc->tgid,total_pages);*/
+                            printk("%d[%u:%d,%u] ", cpc->pid, cpc->actual_pages, cpc->tgid, total_pages);
+                            total_pages = 0;
+                        }
+                    }
+                    printk("0 ");
+                }
+            } else {
+                /* search and display only given pid for partition*/
+                if (NULL == pid_tbl)
+                        printk("0 ");
+                else {
+                    cpc = cpc_find_pid(pid, part);
+                    if (NULL != cpc)
+                        printk("%u ", cpc->actual_pages);
+                    else
+                        printk("%u ", actual_pages);
+                    }
+            }
+            read_unlock_irq(&part->cpc_lock);
+            printk("\n");
+        }
+        disk_part_iter_exit(&piter);
+    }
+    class_dev_iter_exit(&iter);
+}
+/**
+ * proc_cpc_ppp_read
+ *        proc entry for reading threads per device
+ * @file: file pointer
+ * @buf: user buffer pointer
+ * @count: count
+ * @ppos: position offset
+ *
+ * calls pages_per_device
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  size_t: count
+ *
+ */
+static ssize_t proc_cpc_ppp_read(struct file *file, char __user *buf,
+                                                size_t len, loff_t *ppos)
+{
+    int pid = -1;
+    cpc_pages_per_device(pid);
+    return 0;
+
+}
+
+/**
+ * proc_cpc_ppd_read
+ *        proc entry for reading pages per device
+ * @file: file pointer
+ * @buf: user buffer pointer
+ * @count: count
+ * @ppos: position offset
+ *
+ * calls pages_per_device
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  size_t: count
+ *
+ */
+static ssize_t proc_cpc_ppd_read(struct file *file, char __user *buf,
+                                                size_t len, loff_t *ppos)
+{
+    int pid = 0;
+    cpc_pages_per_device(pid);
+    return 0;
+
+}
+
+static const struct file_operations proc_cpc_ppd_ops = {
+    .read        = proc_cpc_ppd_read,
+};
+
+static const struct file_operations proc_cpc_ppp_ops = {
+    .read        = proc_cpc_ppp_read,
+};
+
+/* EXPORTED CPC FUNCTIONS*/
+EXPORT_SYMBOL(cpc_inc);
+EXPORT_SYMBOL(cpc_dec);
+EXPORT_SYMBOL(cpc_add_to_thread);
+EXPORT_SYMBOL(cpc_del_from_thread);
+EXPORT_SYMBOL(cpc_del_part);
+EXPORT_SYMBOL(cpc_init_part);
+EXPORT_SYMBOL(cpc_shift_acct);
+EXPORT_SYMBOL(cpc_pages_per_device);
+
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
 /**
  * add_disk - add partitioning information to kernel list
  * @disk: per-device partitioning information
@@ -613,6 +1675,12 @@ void add_disk(struct gendisk *disk)

    blk_register_region(disk_devt(disk), disk->minors, NULL,
                exact_match, exact_lock, disk);
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    /* part0 is special case when no partition table
+     * no page fetch from device before this point
+    */
+    cpc_init_part(&disk->part0);
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
    register_disk(disk);
    blk_register_queue(disk);

@@ -645,7 +1713,9 @@ void del_gendisk(struct gendisk *disk)
        delete_partition(disk, part->partno);
    }
    disk_part_iter_exit(&piter);
-
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    cpc_del_part(&disk->part0);
+#endif /*CONFIG_CONTROL_PAGE_CACHE */
    invalidate_partition(disk, 0);
    set_capacity(disk, 0);
    disk->flags &= ~GENHD_FL_UP;
@@ -1198,6 +2268,13 @@ static const struct file_operations proc_diskstats_operations = {

 static int __init proc_genhd_init(void)
 {
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    proc_cpcfs = proc_mkdir("max_limit", NULL);
+    if (proc_cpcfs == NULL)
+        return -1;
+    proc_create("pages_per_device", 0, NULL, &proc_cpc_ppd_ops);
+    proc_create("threads_per_device", 0, NULL, &proc_cpc_ppp_ops);
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
    proc_create("diskstats", 0, NULL, &proc_diskstats_operations);
    proc_create("partitions", 0, NULL, &proc_partitions_operations);
    return 0;
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 1cb4dec..c1b50bc 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -249,6 +249,10 @@ void delete_partition(struct gendisk *disk, int partno)
    if (!part)
        return;

+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    cpc_del_part(part);
+#endif /* CONFIG_CONTROL_PAGE_CACHE*/
+
    rcu_assign_pointer(ptbl->part[partno], NULL);
    rcu_assign_pointer(ptbl->last_lookup, NULL);
    kobject_put(part->holder_dir);
@@ -356,6 +360,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
    if (!dev_get_uevent_suppress(ddev))
        kobject_uevent(&pdev->kobj, KOBJ_ADD);

+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    cpc_init_part(p);
+#endif /*CONFIG_CONTROL_PAGE_CACHE */
+
    hd_ref_init(p);
    return p;

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b43ff7..d19fe5e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -92,6 +92,8 @@
 #include "internal.h"
 #include "fd.h"

+#include <linux/genhd.h>
+#include <linux/backing-dev.h>
 /* NOTE:
  *    Implementing inode permission operations in /proc is almost
  *    certainly an error.  Permission checks need to happen during
@@ -2486,6 +2488,163 @@ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
    return err;
 }

+#ifdef CONFIG_CONTROL_PAGE_CACHE
+/**
+ * proc_cpc_tsk_ppd_read
+ *        Displays the pages per device w.r.t task
+ * @file: file pointer
+ * @buf: user buffer pointer
+ * @count: count
+ * @ppos: position offset
+ *
+ * This function prints the details of the task accounted pages
+ * wrt the device/partition calls cpc_pages_per_device
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  size_t: count
+ *
+ */
+static ssize_t proc_cpc_tsk_ppd_read(struct file *file, char __user *buf,
+                                                        size_t len, loff_t *ppos)
+{
+    struct inode *inode = file->f_path.dentry->d_inode;
+    struct task_struct *tsk = get_proc_task(inode);
+    if (!tsk)
+        return -ESRCH;
+    printk(KERN_EMERG"Task        \t :%s\n", tsk->comm);
+    printk(KERN_EMERG"Pid         \t :%d\n", tsk->pid);
+    printk(KERN_EMERG"Tgid        \t :%d\n", tsk->tgid);
+    if (tsk->pid == tsk->tgid) {
+        printk(KERN_EMERG"total pages \t :%u\n", tsk->total_pages);
+        printk(KERN_EMERG"OOM limit   \t :%ld\n", tsk->max_limit);
+    } else {
+        if (tsk->group_leader) {
+            if (!pid_alive(tsk->group_leader))
+                printk(KERN_EMERG"#### NO PARENT TASK\n");
+            get_task_struct(tsk->group_leader);
+            printk(KERN_EMERG"Ppid        \t :%u \n", tsk->group_leader->pid);
+            printk(KERN_EMERG"total pages \t :%u \n", tsk->group_leader->total_pages);
+            printk(KERN_EMERG"OOM limit   \t :%ld \n", tsk->group_leader->max_limit);
+            put_task_struct(tsk->group_leader);
+        }
+    }
+    cpc_pages_per_device(tsk->pid);
+    return 0;
+
+}
+
+/**
+ * proc_cpc_tsk_limit_write
+ *        Update the task Max limit
+ * @file: file pointer
+ * @buf: user buffer pointer
+ * @count: count
+ * @ppos: position offset
+ *
+ * This function updates the max limit of pages fetched
+ * for the process
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  size_t: count
+ *
+ */
+static ssize_t proc_cpc_tsk_limit_write(struct file *file, const char __user *buf,
+                                                            size_t count, loff_t *offs)
+{
+    struct inode *inode = file->f_path.dentry->d_inode;
+    struct task_struct *task = get_proc_task(inode);
+    long max_limit = 0;
+    char buffer[CPC_MAX_BUFFER] = {0};
+    int err = 0;
+
+    if (!task)
+        return -ESRCH;
+
+    if (count > CPC_MAX_BUFFER) {
+        printk(KERN_EMERG"ERROR:Enter value less than 2GB %d ::\n", count);
+        return -EINVAL;
+    }
+
+    if (copy_from_user(buffer, buf, count)) {
+        printk(KERN_EMERG"ERROR:%d\n", count);
+        return -EFAULT;
+    }
+
+    err = strict_strtol(strstrip(buffer), 0, &max_limit);
+    if (err) {
+        printk(KERN_EMERG"ERROR:err %d\n", err);
+        return -EINVAL;
+    }
+    printk(KERN_EMERG"user buf %ld count %d\n", max_limit, count);
+
+    if (max_limit >= CPC_MAX_LIMIT) { /* 2GB*/
+        /* Later we will make it upto the RAM value */
+        printk(KERN_EMERG"Entered value > 2GB\n");
+        return -EINVAL;
+    } else {
+        if (task->pid == task->tgid) {
+            task->max_limit = max_limit;
+        } else {
+            if (task->group_leader) {
+                if (!pid_alive(task->group_leader))
+                    printk(KERN_EMERG"#### NO PARENT TASK\n");
+                get_task_struct(task->group_leader);
+                task->group_leader->max_limit = max_limit;
+                put_task_struct(task->group_leader);
+                printk(KERN_EMERG"Entered value %ld\n", task->parent->max_limit);
+            }
+        }
+    }
+    return count;
+}
+/**
+ * proc_cpc_tsk_limit_read
+ *        Reads the task Max limit
+ * @file: file pointer
+ * @buf: user buffer pointer
+ * @count: count
+ * @ppos: position offset
+ *
+ * This function reads the max limit of pages fetched
+ * for the process
+ *
+ * CONTEXT:
+ *
+ * RETURNS:
+ *  size_t: count
+ *
+ */
+static ssize_t proc_cpc_tsk_limit_read(struct file *file, char __user *buf,
+                                                        size_t len, loff_t *ppos)
+{
+    struct inode *inode = file->f_path.dentry->d_inode;
+    struct task_struct *task = get_proc_task(inode);
+    if (!task)
+        return -ESRCH;
+    if (task->pid == task->tgid)
+        printk(KERN_EMERG"%ld\n", task->max_limit);
+    else {
+        if (task->group_leader) {
+            printk(KERN_EMERG"%ld\n", task->group_leader->max_limit);
+        }
+    }
+    return 0;
+}
+
+static const struct file_operations proc_cpc_tsk_ppd_ops = {
+    .read        = proc_cpc_tsk_ppd_read,
+};
+static const struct file_operations proc_cpc_tsk_limit_ops = {
+    .read        = proc_cpc_tsk_limit_read,
+    .write        = proc_cpc_tsk_limit_write,
+};
+
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
+
 /*
  * Thread groups
  */
@@ -2530,6 +2689,10 @@ static const struct pid_entry tgid_base_stuff[] = {
    LNK("root",       proc_root_link),
    LNK("exe",        proc_exe_link),
    REG("mounts",     S_IRUGO, proc_mounts_operations),
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    REG("pages_per_device",     S_IRUGO, proc_cpc_tsk_ppd_ops),
+    REG("max_limit",     S_IRUGO, proc_cpc_tsk_limit_ops),
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
    REG("mountinfo",  S_IRUGO, proc_mountinfo_operations),
    REG("mountstats", S_IRUSR, proc_mountstats_operations),
 #ifdef CONFIG_PROC_PAGE_MONITOR
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 79b8bba..eff0e4b 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -99,6 +99,24 @@ struct partition_meta_info {
    u8 volname[PARTITION_META_INFO_VOLNAMELTH];
 };

+ #ifdef CONFIG_CONTROL_PAGE_CACHE
+ /* Enables the debugging information */
+ /*#define CPC_DBG        1*/
+ /*#define CPC_DBG_2        1*/
+ #define CPC_MAX_PID_PER_PART    (100)
+ #define CPC_MAX_LIMIT            2147483647 /* 2GB*/
+ #define CPC_MAX_BUFFER            (12)
+ /**
+  * Control Page Cache Structure
+  */
+ struct cpc_struct{
+    int pid;
+    int tgid;
+    unsigned int actual_pages;
+    struct hlist_node pid_list;
+ };
+ #endif /* CONFIG_CONTROL_PAGE_CACHE */
+
 struct hd_struct {
    sector_t start_sect;
    /*
@@ -126,6 +144,13 @@ struct hd_struct {
 #endif
    atomic_t ref;
    struct rcu_head rcu_head;
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    unsigned int total_pages;
+    long max_limit;
+    rwlock_t cpc_lock;
+    struct proc_dir_entry *proc_cpcfs;
+    struct hlist_head *pid_tbl;
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
 };

 #define GENHD_FL_REMOVABLE            1
@@ -190,7 +215,7 @@ struct gendisk {
    void *private_data;

    int flags;
-    struct device *driverfs_dev;  // FIXME: remove
+    struct device *driverfs_dev;  /* FIXME: remove*/
    struct kobject *slave_dir;

    struct timer_rand_state *random;
@@ -633,6 +658,17 @@ extern ssize_t part_fail_store(struct device *dev,
                   struct device_attribute *attr,
                   const char *buf, size_t count);
 #endif /* CONFIG_FAIL_MAKE_REQUEST */
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+int cpc_oom_kill_task(struct task_struct *p);
+void cpc_pages_per_device(int pid);
+void cpc_inc(struct address_space *mapping);
+void cpc_dec(struct address_space *mapping);
+int cpc_add_to_thread(struct address_space *mapping, struct task_struct *tsk);
+int cpc_del_from_thread(struct address_space *mapping, struct task_struct *tsk);
+void cpc_del_part(struct hd_struct *part);
+void cpc_init_part(struct hd_struct *part);
+void cpc_shift_acct(struct task_struct *tsk);
+#endif /* CONFIG_CONTROL_PAGE_CACHE */

 static inline void hd_ref_init(struct hd_struct *part)
 {
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c20635c..d108506 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -173,10 +173,14 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
 void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
                unsigned long);

+#ifndef CONFIG_CONTROL_PAGE_CACHE
 static inline void page_dup_rmap(struct page *page)
 {
    atomic_inc(&page->_mapcount);
 }
+#else /* CONFIG_CONTROL_PAGE_CACHE */
+void page_dup_rmap(struct page *page);
+#endif /*CONFIG_CONTROL_PAGE_CACHE */

 /*
  * Called from mm/vmscan.c to handle paging out
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d211247..a0aceb2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1605,6 +1605,11 @@ struct task_struct {
 #ifdef CONFIG_UPROBES
    struct uprobe_task *utask;
 #endif
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    long max_limit;
+    int total_pages;
+    int dup;
+#endif
 };

 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/fork.c b/kernel/fork.c
index c535f33..d693159 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -206,6 +206,10 @@ static void account_kernel_stack(struct thread_info *ti, int account)

 void free_task(struct task_struct *tsk)
 {
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    if (tsk->pid != tsk->tgid)
+        cpc_shift_acct(tsk);
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
    account_kernel_stack(tsk->stack, -1);
    arch_release_thread_info(tsk->stack);
    free_thread_info(tsk->stack);
@@ -1342,8 +1346,28 @@ static struct task_struct *copy_process(unsigned long clone_flags,

    p->pid = pid_nr(pid);
    p->tgid = p->pid;
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    /* check this while duplicating
+     * mm we didnt have the pid and task
+     * structure of the task that is being
+     * created we got it here
+     * we are accounting that both in total
+     * pages and the dup pages.
+     */
+    p->dup = 0;
+    p->max_limit = -1;
+    p->total_pages = current->dup;
+    p->dup = current->dup;
+    current->dup = 0;
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
    if (clone_flags & CLONE_THREAD)
        p->tgid = current->tgid;
+#ifdef CPC_DBG
+        printk(KERN_EMERG"[%s] created %d by %d dup %d\n", __FUNCTION__,\
+            p->pid, current->pid, p->dup);
+        printk(KERN_EMERG"[%s] p=%s c=%s %d is %s\n", __FUNCTION__,\
+            current->comm, p->comm, p->pid, (clone_flags & CLONE_THREAD) ? "Thread" : "process");
+#endif

    p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
    /*
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 67604e5..6ecfede 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -763,6 +763,16 @@ config DEBUG_BUGVERBOSE
      of the BUG call as well as the EIP and oops trace.  This aids
      debugging but costs about 70-100K of memory.

+config CONTROL_PAGE_CACHE
+    bool "Control Page Cache"
+    depends on DEBUG_KERNEL
+    help
+      Provide Page cache control for each device.
+      Accounting of Pages in Cache will be maintained.
+      proc interface will have the details (e.g pages_per_device)
+      Max page cache limit will be set for each device and
+      once the limit is reached oom will be invoked for that device.
+
 config DEBUG_INFO
    bool "Compile the kernel with debug info"
    depends on DEBUG_KERNEL
diff --git a/mm/filemap.c b/mm/filemap.c
index 83efee7..dc4d9d9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -34,6 +34,9 @@
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
 #include "internal.h"
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+#include <linux/mm_inline.h>
+#endif /* CONFIG_CONTROL_PAGE_CACHE */

 /*
  * FIXME: remove all knowledge of the buffer layer from the core VM
@@ -128,6 +131,10 @@ void __delete_from_page_cache(struct page *page)
    /* Leave page->index set: truncation lookup relies upon it */
    mapping->nrpages--;
    __dec_zone_page_state(page, NR_FILE_PAGES);
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+    if (page_is_file_cache(page))
+        cpc_dec(mapping);
+#endif /*CONFIG_CONTROL_PAGE_CACHE */
    if (PageSwapBacked(page))
        __dec_zone_page_state(page, NR_SHMEM);
    BUG_ON(page_mapped(page));
@@ -463,6 +470,10 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
        if (likely(!error)) {
            mapping->nrpages++;
            __inc_zone_page_state(page, NR_FILE_PAGES);
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+        if (page_is_file_cache(page))
+            cpc_inc(mapping);
+#endif /*CONFIG_CONTROL_PAGE_CACHE */
            spin_unlock_irq(&mapping->tree_lock);
        } else {
            page->mapping = NULL;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 0399f14..9aff3a7 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -392,6 +392,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
        dump_tasks(memcg, nodemask);
 }

+#ifdef CONFIG_CONTROL_PAGE_CACHE
+/**
+ * cpc_oom_kill_task
+ *        Invoke OOM on the task.
+ * @p: task structure.
+ *
+ * RETURNS:
+ *  1: if failure
+ *  0 : if Success
+ *
+ */
+int cpc_oom_kill_task(struct task_struct *p)
+{
+    return do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
+}
+EXPORT_SYMBOL(cpc_oom_kill_task);
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
 #define K(x) ((x) << (PAGE_SHIFT-10))
 /*
  * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/rmap.c b/mm/rmap.c
index 2c78f8c..1d19765 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -61,6 +61,10 @@
 #include <asm/tlbflush.h>

 #include "internal.h"
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+#include <linux/genhd.h>
+#include <linux/mm_inline.h>
+#endif /* CONFIG_CONTROL_PAGE_CACHE */

 static struct kmem_cache *anon_vma_cachep;
 static struct kmem_cache *anon_vma_chain_cachep;
@@ -1116,7 +1120,59 @@ void page_add_file_rmap(struct page *page)
        mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
    }
    mem_cgroup_end_update_page_stat(page, &locked, &flags);
+
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+        if (page_is_file_cache(page)) {
+            if (current->pid == current->tgid) {
+                current->total_pages++;
+            } else {
+                if (current->group_leader) {
+                    get_task_struct(current->group_leader);
+                    current->group_leader->total_pages++;
+                    put_task_struct(current->group_leader);
+                }
+            }
+            cpc_add_to_thread(page->mapping, current);
+#ifdef CPC_DBG
+            printk(KERN_EMERG"[%s]AddingA %d P[0x%x,C%d]\n", \
+                __FUNCTION__, current->pid, page_address(page), \
+                page_mapcount(page));
+#endif /*CPC_DBG */
+        }
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
+}
+
+#ifdef CONFIG_CONTROL_PAGE_CACHE
+/**
+ * page_dup_rmap - duplicate pte mapping to a page
+ * @page:    the page to add the mapping to
+ * @vma:    the vm area being duplicated
+ * @address:    the user virtual address mapped
+ *
+ * For copy_page_range only: minimal extract from page_add_file_rmap /
+ * page_add_anon_rmap, avoiding unnecessary tests (already checked) so it's
+ * quicker.
+ *
+ * The caller needs to hold the pte lock.
+ * MS: mOve from rmap.h as inline to here.
+ *       Need the dup can move there now as prev we need the mm
+ */
+
+void page_dup_rmap(struct page *page)
+{
+
+    atomic_inc(&page->_mapcount);
+    if (page_is_file_cache(page)) {
+#ifdef CPC_DBG
+        printk(KERN_EMERG"[%s]DUP P[0x%x,C%d] %d\n",\
+            __FUNCTION__, page_address(page), page_mapcount(page),\
+            current->pid);
+#endif
+        current->dup++;
+    }
 }
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
+

 /**
  * page_remove_rmap - take down pte mapping from a page
@@ -1139,10 +1195,49 @@ void page_remove_rmap(struct page *page)
    if (!anon)
        mem_cgroup_begin_update_page_stat(page, &locked, &flags);

+#ifndef CONFIG_CONTROL_PAGE_CACHE
    /* page still mapped by someone else? */
    if (!atomic_add_negative(-1, &page->_mapcount))
        goto out;
-
+#else /* CONFIG_CONTROL_PAGE_CACHE */
+    if (!atomic_add_negative(-1, &page->_mapcount)) {
+        if (page_is_file_cache(page)) {
+            if (current->pid == current->tgid) {
+                current->total_pages--;
+            } else {
+                if (current->group_leader) {
+                    get_task_struct(current->group_leader);
+                    current->group_leader->total_pages--;
+                    put_task_struct(current->group_leader);
+                }
+            }
+            cpc_del_from_thread(page->mapping, current);
+#ifdef CPC_DBG
+            printk(KERN_EMERG"[%s]RemovingS %d P[0x%x,C%d]\n",\
+                __FUNCTION__, current->pid, page_address(page),\
+                page_mapcount(page));
+#endif /* CPC_DBG */
+        }
+        return;
+    }
+    if (page_is_file_cache(page)) {
+        if (current->pid == current->tgid) {
+            current->total_pages--;
+        } else {
+            if (current->group_leader) {
+                get_task_struct(current->group_leader);
+                current->group_leader->total_pages--;
+                put_task_struct(current->group_leader);
+            }
+        }
+        cpc_del_from_thread(page->mapping, current);
+#ifdef CPC_DBG
+        printk(KERN_EMERG"[%s]RemovingA %d P[0x%x,C%d]\n",\
+            __FUNCTION__, current->pid, page_address(page),\
+            page_mapcount(page));
+#endif /* CPC_DBG */
+    }
+#endif /* CONFIG_CONTROL_PAGE_CACHE */
    /*
     * Now that the last pte has gone, s390 must transfer dirty
     * flag from storage key to struct page.  We can usually skip
--
1.7.9.5


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]