On Mon, Sep 16, 2019 at 06:45:48PM +0000, Jason Gunthorpe wrote: > On Mon, Sep 16, 2019 at 10:11:51AM +0300, Leon Romanovsky wrote: > > From: Jack Morgenstein <jackm@xxxxxxxxxxxxxxxxxx> > > > > In the process of moving the debug counters sysfs entries, the commit > > mentioned below eliminated the cm_infiniband sysfs directory. > > > > This sysfs directory was tied to the cm_port object allocated in procedure > > cm_add_one(). > > > > Before the commit below, this cm_port object was freed via a call to > > kobject_put(port->kobj) in procedure cm_remove_port_fs(). > > > > Since port no longer uses its kobj, kobject_put(port->kobj) was eliminated. > > This, however, meant that kfree was never called for the cm_port buffers. > > > > Fix this by adding explicit kfree(port) calls to functions cm_add_one() > > and cm_remove_one(). > > > > Note: the kfree call in the first chunk below (in the cm_add_one error > > flow) fixes an old, undetected memory leak. > > > > Fixes: c87e65cfb97c ("RDMA/cm: Move debug counters to be under relevant IB device") > > Signed-off-by: Jack Morgenstein <jackm@xxxxxxxxxxxxxxxxxx> > > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> > > drivers/infiniband/core/cm.c | 3 +++ > > 1 file changed, 3 insertions(+) > > > > diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c > > index da10e6ccb43c..5920c0085d35 100644 > > +++ b/drivers/infiniband/core/cm.c > > @@ -4399,6 +4399,7 @@ static void cm_add_one(struct ib_device *ib_device) > > error1: > > port_modify.set_port_cap_mask = 0; > > port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; > > + kfree(port); > > while (--i) { > > if (!rdma_cap_ib_cm(ib_device, i)) > > continue; > > @@ -4407,6 +4408,7 @@ static void cm_add_one(struct ib_device *ib_device) > > ib_modify_port(ib_device, port->port_num, 0, &port_modify); > > ib_unregister_mad_agent(port->mad_agent); > > cm_remove_port_fs(port); > > + kfree(port); > > } > > free: > > kfree(cm_dev); > > @@ -4460,6 +4462,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) > > spin_unlock_irq(&cm.state_lock); > > ib_unregister_mad_agent(cur_mad_agent); > > cm_remove_port_fs(port); > > + kfree(port); > > } > > This whole thing is looking pretty goofy now, and I suspect there are > more error unwind bugs here. > > How about this instead: It looks OK to me. Thanks > > From e8dad20c7b69436e63b18f16cd9457ea27da5bc1 Mon Sep 17 00:00:00 2001 > From: Jack Morgenstein <jackm@xxxxxxxxxxxxxxxxxx> > Date: Mon, 16 Sep 2019 10:11:51 +0300 > Subject: [PATCH] RDMA/cm: Fix memory leak in cm_add/remove_one > > In the process of moving the debug counters sysfs entries, the commit > mentioned below eliminated the cm_infiniband sysfs directory, and created > some missing cases where the port pointers were not being freed as the > kobject_put was also eliminated. > > Rework this to not allocate port pointers and consolidate all the error > unwind into one sequence. > > This also fixes unlikely racey bugs where error-unwind after unregistering > the MAD handler would miss flushing the WQ and other clean up that is > necessary once concurrency starts. > > Fixes: c87e65cfb97c ("RDMA/cm: Move debug counters to be under relevant IB device") > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx> > --- > drivers/infiniband/core/cm.c | 187 ++++++++++++++++++----------------- > 1 file changed, 94 insertions(+), 93 deletions(-) > > diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c > index da10e6ccb43cd0..30a764e763dec1 100644 > --- a/drivers/infiniband/core/cm.c > +++ b/drivers/infiniband/core/cm.c > @@ -223,7 +223,7 @@ struct cm_device { > struct ib_device *ib_device; > u8 ack_delay; > int going_down; > - struct cm_port *port[0]; > + struct cm_port port[]; > }; > > struct cm_av { > @@ -520,7 +520,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) > read_lock_irqsave(&cm.device_lock, flags); > list_for_each_entry(cm_dev, &cm.device_list, list) { > if (cm_dev->ib_device == attr->device) { > - port = cm_dev->port[attr->port_num - 1]; > + port = &cm_dev->port[attr->port_num - 1]; > break; > } > } > @@ -539,7 +539,7 @@ get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr) > sa_conv_pathrec_to_gid_type(path), > NULL); > if (!IS_ERR(attr)) { > - port = cm_dev->port[attr->port_num - 1]; > + port = &cm_dev->port[attr->port_num - 1]; > break; > } > } > @@ -4319,23 +4319,99 @@ static void cm_remove_port_fs(struct cm_port *port) > > } > > -static void cm_add_one(struct ib_device *ib_device) > +static void cm_destroy_one_port(struct cm_device *cm_dev, unsigned int port_num) > { > - struct cm_device *cm_dev; > - struct cm_port *port; > + struct cm_port *port = &cm_dev->port[port_num - 1]; > + struct ib_port_modify port_modify = { > + .clr_port_cap_mask = IB_PORT_CM_SUP > + }; > + struct ib_mad_agent *cur_mad_agent; > + struct cm_id_private *cm_id_priv; > + > + if (!rdma_cap_ib_cm(cm_dev->ib_device, port_num)) > + return; > + > + ib_modify_port(cm_dev->ib_device, port_num, 0, &port_modify); > + > + /* Mark all the cm_id's as not valid */ > + spin_lock_irq(&cm.lock); > + list_for_each_entry (cm_id_priv, &port->cm_priv_altr_list, altr_list) > + cm_id_priv->altr_send_port_not_ready = 1; > + list_for_each_entry (cm_id_priv, &port->cm_priv_prim_list, prim_list) > + cm_id_priv->prim_send_port_not_ready = 1; > + spin_unlock_irq(&cm.lock); > + > + /* > + * We flush the queue here after the going_down set, this verifies > + * that no new works will be queued in the recv handler, after that we > + * can call the unregister_mad_agent > + */ > + flush_workqueue(cm.wq); > + > + spin_lock_irq(&cm.state_lock); > + cur_mad_agent = port->mad_agent; > + port->mad_agent = NULL; > + spin_unlock_irq(&cm.state_lock); > + > + if (cur_mad_agent) > + ib_unregister_mad_agent(cur_mad_agent); > + > + cm_remove_port_fs(port); > +} > + > +static int cm_init_one_port(struct cm_device *cm_dev, unsigned int port_num) > +{ > + struct cm_port *port = &cm_dev->port[port_num - 1]; > struct ib_mad_reg_req reg_req = { > .mgmt_class = IB_MGMT_CLASS_CM, > .mgmt_class_version = IB_CM_CLASS_VERSION, > }; > struct ib_port_modify port_modify = { > - .set_port_cap_mask = IB_PORT_CM_SUP > + .set_port_cap_mask = IB_PORT_CM_SUP, > }; > - unsigned long flags; > int ret; > + > + if (!rdma_cap_ib_cm(cm_dev->ib_device, port_num)) > + return 0; > + > + set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); > + > + port->cm_dev = cm_dev; > + port->port_num = port_num; > + > + INIT_LIST_HEAD(&port->cm_priv_prim_list); > + INIT_LIST_HEAD(&port->cm_priv_altr_list); > + > + ret = cm_create_port_fs(port); > + if (ret) > + return ret; > + > + port->mad_agent = > + ib_register_mad_agent(cm_dev->ib_device, port_num, IB_QPT_GSI, > + ®_req, 0, cm_send_handler, > + cm_recv_handler, port, 0); > + if (IS_ERR(port->mad_agent)) { > + cm_destroy_one_port(cm_dev, port_num); > + return PTR_ERR(port->mad_agent); > + } > + > + ret = ib_modify_port(cm_dev->ib_device, port_num, 0, &port_modify); > + if (ret) { > + cm_destroy_one_port(cm_dev, port_num); > + return ret; > + } > + > + return 0; > +} > + > +static void cm_add_one(struct ib_device *ib_device) > +{ > + struct cm_device *cm_dev; > + unsigned long flags; > int count = 0; > u8 i; > > - cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), > + cm_dev = kvzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), > GFP_KERNEL); > if (!cm_dev) > return; > @@ -4344,41 +4420,9 @@ static void cm_add_one(struct ib_device *ib_device) > cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay; > cm_dev->going_down = 0; > > - set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); > for (i = 1; i <= ib_device->phys_port_cnt; i++) { > - if (!rdma_cap_ib_cm(ib_device, i)) > - continue; > - > - port = kzalloc(sizeof *port, GFP_KERNEL); > - if (!port) > - goto error1; > - > - cm_dev->port[i-1] = port; > - port->cm_dev = cm_dev; > - port->port_num = i; > - > - INIT_LIST_HEAD(&port->cm_priv_prim_list); > - INIT_LIST_HEAD(&port->cm_priv_altr_list); > - > - ret = cm_create_port_fs(port); > - if (ret) > - goto error1; > - > - port->mad_agent = ib_register_mad_agent(ib_device, i, > - IB_QPT_GSI, > - ®_req, > - 0, > - cm_send_handler, > - cm_recv_handler, > - port, > - 0); > - if (IS_ERR(port->mad_agent)) > - goto error2; > - > - ret = ib_modify_port(ib_device, i, 0, &port_modify); > - if (ret) > - goto error3; > - > + if (!cm_init_one_port(cm_dev, i)) > + goto error; > count++; > } > > @@ -4392,35 +4436,16 @@ static void cm_add_one(struct ib_device *ib_device) > write_unlock_irqrestore(&cm.device_lock, flags); > return; > > -error3: > - ib_unregister_mad_agent(port->mad_agent); > -error2: > - cm_remove_port_fs(port); > -error1: > - port_modify.set_port_cap_mask = 0; > - port_modify.clr_port_cap_mask = IB_PORT_CM_SUP; > - while (--i) { > - if (!rdma_cap_ib_cm(ib_device, i)) > - continue; > - > - port = cm_dev->port[i-1]; > - ib_modify_port(ib_device, port->port_num, 0, &port_modify); > - ib_unregister_mad_agent(port->mad_agent); > - cm_remove_port_fs(port); > - } > +error: > + while (--i) > + cm_destroy_one_port(cm_dev, i); > free: > - kfree(cm_dev); > + kvfree(cm_dev); > } > > static void cm_remove_one(struct ib_device *ib_device, void *client_data) > { > struct cm_device *cm_dev = client_data; > - struct cm_port *port; > - struct cm_id_private *cm_id_priv; > - struct ib_mad_agent *cur_mad_agent; > - struct ib_port_modify port_modify = { > - .clr_port_cap_mask = IB_PORT_CM_SUP > - }; > unsigned long flags; > int i; > > @@ -4435,34 +4460,10 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) > cm_dev->going_down = 1; > spin_unlock_irq(&cm.lock); > > - for (i = 1; i <= ib_device->phys_port_cnt; i++) { > - if (!rdma_cap_ib_cm(ib_device, i)) > - continue; > - > - port = cm_dev->port[i-1]; > - ib_modify_port(ib_device, port->port_num, 0, &port_modify); > - /* Mark all the cm_id's as not valid */ > - spin_lock_irq(&cm.lock); > - list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list) > - cm_id_priv->altr_send_port_not_ready = 1; > - list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list) > - cm_id_priv->prim_send_port_not_ready = 1; > - spin_unlock_irq(&cm.lock); > - /* > - * We flush the queue here after the going_down set, this > - * verify that no new works will be queued in the recv handler, > - * after that we can call the unregister_mad_agent > - */ > - flush_workqueue(cm.wq); > - spin_lock_irq(&cm.state_lock); > - cur_mad_agent = port->mad_agent; > - port->mad_agent = NULL; > - spin_unlock_irq(&cm.state_lock); > - ib_unregister_mad_agent(cur_mad_agent); > - cm_remove_port_fs(port); > - } > + for (i = 1; i <= ib_device->phys_port_cnt; i++) > + cm_destroy_one_port(cm_dev, i); > > - kfree(cm_dev); > + kvfree(cm_dev); > } > > static int __init ib_cm_init(void) > -- > 2.23.0 >