Re: [PATCH] libceph: allow custom network namespaces

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



sorry, I missed the __sock_create hunk during my merge from our 3.13
kernel tree.
Now I took it and tested rbd map/write/read/unmap in docker container again.
I'll post the updated patch again.

On Wed, Jun 10, 2015 at 9:30 PM, Ilya Dryomov <idryomov@xxxxxxxxx> wrote:
> On Wed, Jun 10, 2015 at 4:01 PM, Hong Zhiguo <honkiko@xxxxxxxxx> wrote:
>> in current implementaion init_net is always used.
>>
>> But in most cases, if user do a rbd map or ceph mount in
>> a container, it's expected to use the container network namespace.
>>
>> This patch saves the container's netns in ceph_options on a rbd map
>> or ceph mount. And use the netns other than init_net when creating
>> socket. Ref count of the netns is only taken by the ceph_options
>> in ceph_client since lifetime of osds and mon is within that of
>> ceph_client.
>>
>> I've tested this patch in docker container with below operations:
>> - rbd map
>> - write/read on the rbd
>> - rbd unmap
>>
>> Signed-off-by: Hong Zhiguo <zhiguohong@xxxxxxxxxxx>
>> ---
>>  fs/ceph/mds_client.c           | 3 ++-
>>  include/linux/ceph/libceph.h   | 3 +++
>>  include/linux/ceph/messenger.h | 4 +++-
>>  net/ceph/ceph_common.c         | 7 ++++---
>>  net/ceph/messenger.c           | 8 +++++++-
>>  net/ceph/mon_client.c          | 2 +-
>>  net/ceph/osd_client.c          | 3 ++-
>>  7 files changed, 22 insertions(+), 8 deletions(-)
>>
>> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
>> index 8080d48..3fb0976 100644
>> --- a/fs/ceph/mds_client.c
>> +++ b/fs/ceph/mds_client.c
>> @@ -440,7 +440,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
>>         s->s_seq = 0;
>>         mutex_init(&s->s_mutex);
>>
>> -       ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
>> +       ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr,
>> +                       mdsc->fsc->client->options->netns);
>>
>>         spin_lock_init(&s->s_gen_ttl_lock);
>>         s->s_cap_gen = 0;
>> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
>> index d73a569..442d9f3 100644
>> --- a/include/linux/ceph/libceph.h
>> +++ b/include/linux/ceph/libceph.h
>> @@ -22,6 +22,8 @@
>>  #include <linux/ceph/osd_client.h>
>>  #include <linux/ceph/ceph_fs.h>
>>
>> +struct net;
>> +
>>  /*
>>   * mount options
>>   */
>> @@ -46,6 +48,7 @@ struct ceph_options {
>>         unsigned long mount_timeout;            /* jiffies */
>>         unsigned long osd_idle_ttl;             /* jiffies */
>>         unsigned long osd_keepalive_timeout;    /* jiffies */
>> +       struct net *netns;
>>
>>         /*
>>          * any type that can't be simply compared or doesn't need need
>> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
>> index e154994..3b0a314 100644
>> --- a/include/linux/ceph/messenger.h
>> +++ b/include/linux/ceph/messenger.h
>> @@ -14,6 +14,7 @@
>>
>>  struct ceph_msg;
>>  struct ceph_connection;
>> +struct net;
>>
>>  /*
>>   * Ceph defines these callbacks for handling connection events.
>> @@ -189,6 +190,7 @@ struct ceph_connection {
>>         struct ceph_messenger *msgr;
>>
>>         atomic_t sock_state;
>> +       struct net *netns;
>>         struct socket *sock;
>>         struct ceph_entity_addr peer_addr; /* peer address */
>>         struct ceph_entity_addr peer_addr_for_me;
>> @@ -270,7 +272,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr,
>>
>>  extern void ceph_con_init(struct ceph_connection *con, void *private,
>>                         const struct ceph_connection_operations *ops,
>> -                       struct ceph_messenger *msgr);
>> +                       struct ceph_messenger *msgr, struct net *netns);
>>  extern void ceph_con_open(struct ceph_connection *con,
>>                           __u8 entity_type, __u64 entity_num,
>>                           struct ceph_entity_addr *addr);
>> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
>> index 925d0c8..1c42d96 100644
>> --- a/net/ceph/ceph_common.c
>> +++ b/net/ceph/ceph_common.c
>> @@ -269,6 +269,9 @@ static match_table_t opt_tokens = {
>>  void ceph_destroy_options(struct ceph_options *opt)
>>  {
>>         dout("destroy_options %p\n", opt);
>> +       if (opt->netns) {
>> +               put_net(opt->netns);
>> +       }
>>         kfree(opt->name);
>>         if (opt->key) {
>>                 ceph_crypto_key_destroy(opt->key);
>> @@ -335,9 +338,6 @@ ceph_parse_options(char *options, const char *dev_name,
>>         int err = -ENOMEM;
>>         substring_t argstr[MAX_OPT_ARGS];
>>
>> -       if (current->nsproxy->net_ns != &init_net)
>> -               return ERR_PTR(-EINVAL);
>> -
>>         opt = kzalloc(sizeof(*opt), GFP_KERNEL);
>>         if (!opt)
>>                 return ERR_PTR(-ENOMEM);
>> @@ -501,6 +501,7 @@ ceph_parse_options(char *options, const char *dev_name,
>>         }
>>
>>         /* success */
>> +       opt->netns = get_net(current->nsproxy->net_ns);
>>         return opt;
>>
>>  out:
>> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
>> index 967080a..43ec07d 100644
>> --- a/net/ceph/messenger.c
>> +++ b/net/ceph/messenger.c
>> @@ -736,7 +736,7 @@ bool ceph_con_opened(struct ceph_connection *con)
>>   */
>>  void ceph_con_init(struct ceph_connection *con, void *private,
>>         const struct ceph_connection_operations *ops,
>> -       struct ceph_messenger *msgr)
>> +       struct ceph_messenger *msgr, struct net *netns)
>>  {
>>         dout("con_init %p\n", con);
>>         memset(con, 0, sizeof(*con));
>> @@ -744,6 +744,12 @@ void ceph_con_init(struct ceph_connection *con, void *private,
>>         con->ops = ops;
>>         con->msgr = msgr;
>>
>> +       /*
>> +        * don't take extra refcnt of netns here since both mon and osds
>> +        * have lifetime within that of ceph_client
>> +        */
>> +       con->netns = netns;
>> +
>>         con_sock_state_init(con);
>>
>>         mutex_init(&con->mutex);
>> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
>> index 9d6ff12..04128af 100644
>> --- a/net/ceph/mon_client.c
>> +++ b/net/ceph/mon_client.c
>> @@ -832,7 +832,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl)
>>                 goto out_auth_reply;
>>
>>         ceph_con_init(&monc->con, monc, &mon_con_ops,
>> -                     &monc->client->msgr);
>> +                     &monc->client->msgr, monc->client->options->netns);
>>
>>         monc->cur_mon = -1;
>>         monc->hunting = true;
>> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
>> index 5003367..32d9fa9 100644
>> --- a/net/ceph/osd_client.c
>> +++ b/net/ceph/osd_client.c
>> @@ -1022,7 +1022,8 @@ static struct ceph_osd *create_osd(struct ceph_osd_client *osdc, int onum)
>>         INIT_LIST_HEAD(&osd->o_osd_lru);
>>         osd->o_incarnation = 1;
>>
>> -       ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
>> +       ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr,
>> +                       osdc->client->options->netns);
>>
>>         INIT_LIST_HEAD(&osd->o_keepalive_item);
>>         return osd;
>
> It seems to me your patch boils down to killing the init_ns check and
> adding a netns field to struct ceph_connection, which is assigned to
> but never used.  Given that, can you elaborate on the "And use the
> netns other than init_net when creating socket" part and explain in
> a little bit more detail what is accomplished here?
>
> Thanks,
>
>                 Ilya



-- 
best regards
Hong Zhiguo
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]
  Powered by Linux