Re: [PATCH][BUGFIX] cgroups: fix pid namespace bug

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 1 Jul 2009 18:36:36 -0700Paul Menage <menage@xxxxxxxxxx> wrote:
> Thanks Li - but as I said to Serge in the email when I brought this up> originally, I already had a patch in mind for this; I've had an intern> (Ben) at Google working on it. His patch (pretty much ready, and being> sent out tomorrow I hope) is pretty similar to yours, but his is on> top of another patch that provides a (currently read-only)> "cgroup.procs" file in each cgroup dir that lists the unique tgids in> the cgroup. So the key in the list of pid arrays is actually a pid_ns> and a file type (indicating procs or tasks) rather than just a pid_ns.> Wow, I welcome "procs" file :)
Off-topic:Is there relationship betweem mm->owner and tgid ?If no, is it difficult to synchronize tgid and mm->owner ?
THanks,-Kame

> So I think it makes more sense to not use this patch, but to use the> pair of patches that Ben's written, since they provide more overal> functionality.> > Paul> > On Wed, Jul 1, 2009 at 6:24 PM, Li Zefan<lizf@xxxxxxxxxxxxxx> wrote:> > The bug was introduced by commit cc31edceee04a7b87f2be48f9489ebb72d264844> > ("cgroups: convert tasks file to use a seq_file with shared pid array").> >> > We cache a pid array for all threads that are opening the same "tasks"> > file, but the pids in the array are always from the namespace of the> > last process that opened the file, so all other threads will read pids> > from that namespace instead of their own namespaces.> >> > To fix it, we maintain a list of pid arrays, which is keyed by pid_ns.> > The list will be of length 1 at most time.> >> > Reported-by: Paul Menage <menage@xxxxxxxxxx>> > Idea-by: Paul Menage <menage@xxxxxxxxxx>> > Signed-off-by: Li Zefan <lizf@xxxxxxxxxxxxxx>> > ---> >  include/linux/cgroup.h |   11 ++----> >  kernel/cgroup.c        |   94 +++++++++++++++++++++++++++++++++++------------> >  2 files changed, 74 insertions(+), 31 deletions(-)> >> > diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h> > index 665fa70..20411d2 100644> > --- a/include/linux/cgroup.h> > +++ b/include/linux/cgroup.h> > @@ -179,14 +179,11 @@ struct cgroup {> >         */> >        struct list_head release_list;> >> > -       /* pids_mutex protects the fields below */> > +       /* pids_mutex protects pids_list and cached pid arrays. */> >        struct rw_semaphore pids_mutex;> > -       /* Array of process ids in the cgroup */> > -       pid_t *tasks_pids;> > -       /* How many files are using the current tasks_pids array */> > -       int pids_use_count;> > -       /* Length of the current tasks_pids array */> > -       int pids_length;> > +> > +       /* Linked list of struct cgroup_pids */> > +       struct list_head pids_list;> >> >        /* For RCU-protected deletion */> >        struct rcu_head rcu_head;> > diff --git a/kernel/cgroup.c b/kernel/cgroup.c> > index 3737a68..13dddb4 100644> > --- a/kernel/cgroup.c> > +++ b/kernel/cgroup.c> > @@ -47,6 +47,7 @@> >  #include <linux/hash.h>> >  #include <linux/namei.h>> >  #include <linux/smp_lock.h>> > +#include <linux/pid_namespace.h>> >> >  #include <asm/atomic.h>> >> > @@ -960,6 +961,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)> >        INIT_LIST_HEAD(&cgrp->children);> >        INIT_LIST_HEAD(&cgrp->css_sets);> >        INIT_LIST_HEAD(&cgrp->release_list);> > +       INIT_LIST_HEAD(&cgrp->pids_list);> >        init_rwsem(&cgrp->pids_mutex);> >  }> >  static void init_cgroup_root(struct cgroupfs_root *root)> > @@ -2201,12 +2203,30 @@ err:> >        return ret;> >  }> >> > +/*> > + * Cache pids for all threads in the same pid namespace that are> > + * opening the same "tasks" file.> > + */> > +struct cgroup_pids {> > +       /* The node in cgrp->pids_list */> > +       struct list_head list;> > +       /* The cgroup those pids belong to */> > +       struct cgroup *cgrp;> > +       /* The namepsace those pids belong to */> > +       struct pid_namespace *pid_ns;> > +       /* Array of process ids in the cgroup */> > +       pid_t *tasks_pids;> > +       /* How many files are using the this tasks_pids array */> > +       int pids_use_count;> > +       /* Length of the current tasks_pids array */> > +       int pids_length;> > +};> > +> >  static int cmppid(const void *a, const void *b)> >  {> >        return *(pid_t *)a - *(pid_t *)b;> >  }> >> > -> >  /*> >  * seq_file methods for the "tasks" file. The seq_file position is the> >  * next pid to display; the seq_file iterator is a pointer to the pid> > @@ -2221,45 +2241,47 @@ static void *cgroup_tasks_start(struct seq_file *s, loff_t *pos)> >         * after a seek to the start). Use a binary-search to find the> >         * next pid to display, if any> >         */> > -       struct cgroup *cgrp = s->private;> > +       struct cgroup_pids *cp = s->private;> > +       struct cgroup *cgrp = cp->cgrp;> >        int index = 0, pid = *pos;> >        int *iter;> >> >        down_read(&cgrp->pids_mutex);> >        if (pid) {> > -               int end = cgrp->pids_length;> > +               int end = cp->pids_length;> >> >                while (index < end) {> >                        int mid = (index + end) / 2;> > -                       if (cgrp->tasks_pids[mid] == pid) {> > +                       if (cp->tasks_pids[mid] == pid) {> >                                index = mid;> >                                break;> > -                       } else if (cgrp->tasks_pids[mid] <= pid)> > +                       } else if (cp->tasks_pids[mid] <= pid)> >                                index = mid + 1;> >                        else> >                                end = mid;> >                }> >        }> >        /* If we're off the end of the array, we're done */> > -       if (index >= cgrp->pids_length)> > +       if (index >= cp->pids_length)> >                return NULL;> >        /* Update the abstract position to be the actual pid that we found */> > -       iter = cgrp->tasks_pids + index;> > +       iter = cp->tasks_pids + index;> >        *pos = *iter;> >        return iter;> >  }> >> >  static void cgroup_tasks_stop(struct seq_file *s, void *v)> >  {> > -       struct cgroup *cgrp = s->private;> > +       struct cgroup_pids *cp = s->private;> > +       struct cgroup *cgrp = cp->cgrp;> >        up_read(&cgrp->pids_mutex);> >  }> >> >  static void *cgroup_tasks_next(struct seq_file *s, void *v, loff_t *pos)> >  {> > -       struct cgroup *cgrp = s->private;> > +       struct cgroup_pids *cp = s->private;> >        int *p = v;> > -       int *end = cgrp->tasks_pids + cgrp->pids_length;> > +       int *end = cp->tasks_pids + cp->pids_length;> >> >        /*> >         * Advance to the next pid in the array. If this goes off the> > @@ -2286,26 +2308,32 @@ static struct seq_operations cgroup_tasks_seq_operations = {> >        .show = cgroup_tasks_show,> >  };> >> > -static void release_cgroup_pid_array(struct cgroup *cgrp)> > +static void release_cgroup_pid_array(struct cgroup_pids *cp)> >  {> > +       struct cgroup *cgrp = cp->cgrp;> > +> >        down_write(&cgrp->pids_mutex);> > -       BUG_ON(!cgrp->pids_use_count);> > -       if (!--cgrp->pids_use_count) {> > -               kfree(cgrp->tasks_pids);> > -               cgrp->tasks_pids = NULL;> > -               cgrp->pids_length = 0;> > +       BUG_ON(!cp->pids_use_count);> > +       if (!--cp->pids_use_count) {> > +               list_del(&cp->list);> > +               kfree(cp->tasks_pids);> > +               kfree(cp);> >        }> >        up_write(&cgrp->pids_mutex);> >  }> >> >  static int cgroup_tasks_release(struct inode *inode, struct file *file)> >  {> > -       struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);> > +       struct seq_file *seq;> > +       struct cgroup_pids *cp;> >> >        if (!(file->f_mode & FMODE_READ))> >                return 0;> >> > -       release_cgroup_pid_array(cgrp);> > +       seq = file->private_data;> > +       cp = seq->private;> > +> > +       release_cgroup_pid_array(cp);> >        return seq_release(inode, file);> >  }> >> > @@ -2324,6 +2352,8 @@ static struct file_operations cgroup_tasks_operations = {> >  static int cgroup_tasks_open(struct inode *unused, struct file *file)> >  {> >        struct cgroup *cgrp = __d_cgrp(file->f_dentry->d_parent);> > +       struct pid_namespace *pid_ns = task_active_pid_ns(current);> > +       struct cgroup_pids *cp;> >        pid_t *pidarray;> >        int npids;> >        int retval;> > @@ -2350,20 +2380,36 @@ static int cgroup_tasks_open(struct inode *unused, struct file *file)> >         * array if necessary> >         */> >        down_write(&cgrp->pids_mutex);> > -       kfree(cgrp->tasks_pids);> > -       cgrp->tasks_pids = pidarray;> > -       cgrp->pids_length = npids;> > -       cgrp->pids_use_count++;> > +> > +       list_for_each_entry(cp, &cgrp->pids_list, list) {> > +               if (pid_ns == cp->pid_ns)> > +                       goto found;> > +       }> > +> > +       cp = kzalloc(sizeof(*cp), GFP_KERNEL);> > +       if (!cp) {> > +               up_write(&cgrp->pids_mutex);> > +               kfree(pidarray);> > +               return -ENOMEM;> > +       }> > +       cp->cgrp = cgrp;> > +       cp->pid_ns = pid_ns;> > +       list_add(&cp->list, &cgrp->pids_list);> > +found:> > +       kfree(cp->tasks_pids);> > +       cp->tasks_pids = pidarray;> > +       cp->pids_length = npids;> > +       cp->pids_use_count++;> >        up_write(&cgrp->pids_mutex);> >> >        file->f_op = &cgroup_tasks_operations;> >> >        retval = seq_open(file, &cgroup_tasks_seq_operations);> >        if (retval) {> > -               release_cgroup_pid_array(cgrp);> > +               release_cgroup_pid_array(cp);> >                return retval;> >        }> > -       ((struct seq_file *)file->private_data)->private = cgrp;> > +       ((struct seq_file *)file->private_data)->private = cp;> >        return 0;> >  }> >> > --> > 1.5.4.rc3> >> _______________________________________________> Containers mailing list> Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx> https://lists.linux-foundation.org/mailman/listinfo/containers> 
_______________________________________________Containers mailing listContainers@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx://lists.linux-foundation.org/mailman/listinfo/containers


[Index of Archives]     [Cgroups]     [Netdev]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux