On Tuesday, July 23, 2013 04:09:26 PM Lv Zheng wrote: > This patch uses reference counting to fix the race caused by the > unprotected ACPI IPMI user. > > As the acpi_ipmi_device->user_interface check in acpi_ipmi_space_handler() > can happen before setting user_interface to NULL and codes after the check > in acpi_ipmi_space_handler() can happen after user_interface becoming NULL, > then the on-going acpi_ipmi_space_handler() still can pass an invalid > acpi_ipmi_device->user_interface to ipmi_request_settime(). Such race > condition is not allowed by the IPMI layer's API design as crash will > happen in ipmi_request_settime(). > In IPMI layer, smi_gone()/new_smi() callbacks are protected by > smi_watchers_mutex, thus their invocations are serialized. But as a new > smi can re-use the freed intf_num, it requires that the callback > implementation must not use intf_num as an identification mean or it must > ensure all references to the previous smi are all dropped before exiting > smi_gone() callback. In case of acpi_ipmi module, this means > ipmi_flush_tx_msg() must ensure all on-going IPMI transfers are completed > before exiting ipmi_flush_tx_msg(). > > This patch follows ipmi_devintf.c design: > 1. Invoking ipmi_destroy_user() after the reference count of > acpi_ipmi_device dropping to 0, this matches IPMI layer's API calling > rule on ipmi_destroy_user() and ipmi_request_settime(). > 2. References of acpi_ipmi_device dropping to 1 means tx_msg related to > this acpi_ipmi_device are all freed, this can be used to implement the > new flushing mechanism. Note complete() must be retried so that the > on-going tx_msg won't block flushing at the point to add tx_msg into > tx_msg_list where reference of acpi_ipmi_device is held. This matches > the IPMI layer's callback rule on smi_gone()/new_smi() serialization. > 3. ipmi_flush_tx_msg() is performed after deleting acpi_ipmi_device from > the list so that no new tx_msg can be created after entering flushing > process. > 4. The flushing of tx_msg is also moved out of ipmi_lock in this patch. > > The forthcoming IPMI operation region handler installation changes also > requires acpi_ipmi_device be handled in the reference counting style. > > Authorship is also updated due to this design change. > > Signed-off-by: Lv Zheng <lv.zheng@xxxxxxxxx> > Cc: Zhao Yakui <yakui.zhao@xxxxxxxxx> > Reviewed-by: Huang Ying <ying.huang@xxxxxxxxx> > --- > drivers/acpi/acpi_ipmi.c | 249 +++++++++++++++++++++++++++------------------- > 1 file changed, 149 insertions(+), 100 deletions(-) > > diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c > index 527ee43..cbf25e0 100644 > --- a/drivers/acpi/acpi_ipmi.c > +++ b/drivers/acpi/acpi_ipmi.c > @@ -1,8 +1,9 @@ > /* > * acpi_ipmi.c - ACPI IPMI opregion > * > - * Copyright (C) 2010 Intel Corporation > - * Copyright (C) 2010 Zhao Yakui <yakui.zhao@xxxxxxxxx> > + * Copyright (C) 2010, 2013 Intel Corporation > + * Author: Zhao Yakui <yakui.zhao@xxxxxxxxx> > + * Lv Zheng <lv.zheng@xxxxxxxxx> > * > * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ > * > @@ -67,6 +68,7 @@ struct acpi_ipmi_device { > long curr_msgid; > unsigned long flags; > struct ipmi_smi_info smi_data; > + atomic_t refcnt; Can you use a kref instead? > }; > > struct ipmi_driver_data { > @@ -107,8 +109,8 @@ struct acpi_ipmi_buffer { > static void ipmi_register_bmc(int iface, struct device *dev); > static void ipmi_bmc_gone(int iface); > static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data); > -static void acpi_add_ipmi_device(struct acpi_ipmi_device *ipmi_device); > -static void acpi_remove_ipmi_device(struct acpi_ipmi_device *ipmi_device); > +static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi); > +static void ipmi_remove_space_handler(struct acpi_ipmi_device *ipmi); > > static struct ipmi_driver_data driver_data = { > .ipmi_devices = LIST_HEAD_INIT(driver_data.ipmi_devices), > @@ -122,6 +124,80 @@ static struct ipmi_driver_data driver_data = { > }, > }; > > +static struct acpi_ipmi_device * > +ipmi_dev_alloc(int iface, struct ipmi_smi_info *smi_data, acpi_handle handle) > +{ > + struct acpi_ipmi_device *ipmi_device; > + int err; > + ipmi_user_t user; > + > + ipmi_device = kzalloc(sizeof(*ipmi_device), GFP_KERNEL); > + if (!ipmi_device) > + return NULL; > + > + atomic_set(&ipmi_device->refcnt, 1); > + INIT_LIST_HEAD(&ipmi_device->head); > + INIT_LIST_HEAD(&ipmi_device->tx_msg_list); > + spin_lock_init(&ipmi_device->tx_msg_lock); > + > + ipmi_device->handle = handle; > + ipmi_device->pnp_dev = to_pnp_dev(get_device(smi_data->dev)); > + memcpy(&ipmi_device->smi_data, smi_data, sizeof(struct ipmi_smi_info)); > + ipmi_device->ipmi_ifnum = iface; > + > + err = ipmi_create_user(iface, &driver_data.ipmi_hndlrs, > + ipmi_device, &user); > + if (err) { > + put_device(smi_data->dev); > + kfree(ipmi_device); > + return NULL; > + } > + ipmi_device->user_interface = user; > + ipmi_install_space_handler(ipmi_device); > + > + return ipmi_device; > +} > + > +static struct acpi_ipmi_device * > +acpi_ipmi_dev_get(struct acpi_ipmi_device *ipmi_device) > +{ > + if (ipmi_device) > + atomic_inc(&ipmi_device->refcnt); > + return ipmi_device; > +} > + > +static void ipmi_dev_release(struct acpi_ipmi_device *ipmi_device) > +{ > + ipmi_remove_space_handler(ipmi_device); > + ipmi_destroy_user(ipmi_device->user_interface); > + put_device(ipmi_device->smi_data.dev); > + kfree(ipmi_device); > +} > + > +static void acpi_ipmi_dev_put(struct acpi_ipmi_device *ipmi_device) > +{ > + if (ipmi_device && atomic_dec_and_test(&ipmi_device->refcnt)) > + ipmi_dev_release(ipmi_device); > +} > + > +static struct acpi_ipmi_device *acpi_ipmi_get_targeted_smi(int iface) > +{ > + int dev_found = 0; > + struct acpi_ipmi_device *ipmi_device; > + Why don't you do struct acpi_ipmi_device *ipmi_device, *ret = NULL; and then -> > + mutex_lock(&driver_data.ipmi_lock); > + list_for_each_entry(ipmi_device, &driver_data.ipmi_devices, head) { > + if (ipmi_device->ipmi_ifnum == iface) { -> ret = ipmi_device; -> > + dev_found = 1; > + acpi_ipmi_dev_get(ipmi_device); > + break; > + } > + } > + mutex_unlock(&driver_data.ipmi_lock); > + > + return dev_found ? ipmi_device : NULL; -> return ret; > +} > + > static struct acpi_ipmi_msg *acpi_alloc_ipmi_msg(struct acpi_ipmi_device *ipmi) > { > struct acpi_ipmi_msg *ipmi_msg; > @@ -228,25 +304,24 @@ static void acpi_format_ipmi_response(struct acpi_ipmi_msg *msg, > static void ipmi_flush_tx_msg(struct acpi_ipmi_device *ipmi) > { > struct acpi_ipmi_msg *tx_msg, *temp; > - int count = HZ / 10; > - struct pnp_dev *pnp_dev = ipmi->pnp_dev; > unsigned long flags; > > - spin_lock_irqsave(&ipmi->tx_msg_lock, flags); > - list_for_each_entry_safe(tx_msg, temp, &ipmi->tx_msg_list, head) { > - /* wake up the sleep thread on the Tx msg */ > - complete(&tx_msg->tx_complete); > - } > - spin_unlock_irqrestore(&ipmi->tx_msg_lock, flags); > - > - /* wait for about 100ms to flush the tx message list */ > - while (count--) { > - if (list_empty(&ipmi->tx_msg_list)) > - break; > - schedule_timeout(1); > + /* > + * NOTE: Synchronous Flushing > + * Wait until refnct dropping to 1 - no other users unless this > + * context. This function should always be called before > + * acpi_ipmi_device destruction. > + */ > + while (atomic_read(&ipmi->refcnt) > 1) { Isn't this racy? What if we see that the refcount is 1 and break the loop, but someone else bumps up the refcount at the same time? > + spin_lock_irqsave(&ipmi->tx_msg_lock, flags); > + list_for_each_entry_safe(tx_msg, temp, > + &ipmi->tx_msg_list, head) { > + /* wake up the sleep thread on the Tx msg */ > + complete(&tx_msg->tx_complete); > + } > + spin_unlock_irqrestore(&ipmi->tx_msg_lock, flags); > + schedule_timeout_uninterruptible(msecs_to_jiffies(1)); > } > - if (!list_empty(&ipmi->tx_msg_list)) > - dev_warn(&pnp_dev->dev, "tx msg list is not NULL\n"); > } > > static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data) > @@ -304,22 +379,26 @@ static void ipmi_register_bmc(int iface, struct device *dev) > { > struct acpi_ipmi_device *ipmi_device, *temp; > struct pnp_dev *pnp_dev; > - ipmi_user_t user; > int err; > struct ipmi_smi_info smi_data; > acpi_handle handle; > > err = ipmi_get_smi_info(iface, &smi_data); > - > if (err) > return; > > - if (smi_data.addr_src != SI_ACPI) { > - put_device(smi_data.dev); > - return; > - } > - > + if (smi_data.addr_src != SI_ACPI) > + goto err_ref; > handle = smi_data.addr_info.acpi_info.acpi_handle; > + if (!handle) > + goto err_ref; > + pnp_dev = to_pnp_dev(smi_data.dev); > + > + ipmi_device = ipmi_dev_alloc(iface, &smi_data, handle); > + if (!ipmi_device) { > + dev_warn(&pnp_dev->dev, "Can't create IPMI user interface\n"); > + goto err_ref; > + } > > mutex_lock(&driver_data.ipmi_lock); > list_for_each_entry(temp, &driver_data.ipmi_devices, head) { > @@ -328,54 +407,42 @@ static void ipmi_register_bmc(int iface, struct device *dev) > * to the device list, don't add it again. > */ > if (temp->handle == handle) > - goto out; > + goto err_lock; > } > > - ipmi_device = kzalloc(sizeof(*ipmi_device), GFP_KERNEL); > - > - if (!ipmi_device) > - goto out; > - > - pnp_dev = to_pnp_dev(smi_data.dev); > - ipmi_device->handle = handle; > - ipmi_device->pnp_dev = pnp_dev; > - > - err = ipmi_create_user(iface, &driver_data.ipmi_hndlrs, > - ipmi_device, &user); > - if (err) { > - dev_warn(&pnp_dev->dev, "Can't create IPMI user interface\n"); > - kfree(ipmi_device); > - goto out; > - } > - acpi_add_ipmi_device(ipmi_device); > - ipmi_device->user_interface = user; > - ipmi_device->ipmi_ifnum = iface; > + list_add_tail(&ipmi_device->head, &driver_data.ipmi_devices); > mutex_unlock(&driver_data.ipmi_lock); > - memcpy(&ipmi_device->smi_data, &smi_data, sizeof(struct ipmi_smi_info)); > + put_device(smi_data.dev); > return; > > -out: > +err_lock: > mutex_unlock(&driver_data.ipmi_lock); > + ipmi_dev_release(ipmi_device); > +err_ref: > put_device(smi_data.dev); > return; > } > > static void ipmi_bmc_gone(int iface) > { > - struct acpi_ipmi_device *ipmi_device, *temp; > + int dev_found = 0; > + struct acpi_ipmi_device *ipmi_device; > > mutex_lock(&driver_data.ipmi_lock); > - list_for_each_entry_safe(ipmi_device, temp, > - &driver_data.ipmi_devices, head) { > - if (ipmi_device->ipmi_ifnum != iface) > - continue; > - > - acpi_remove_ipmi_device(ipmi_device); > - put_device(ipmi_device->smi_data.dev); > - kfree(ipmi_device); > - break; > + list_for_each_entry(ipmi_device, &driver_data.ipmi_devices, head) { > + if (ipmi_device->ipmi_ifnum == iface) { > + dev_found = 1; You can do the list_del() here, because you're under the mutex, so others won't see the list in an inconsistens state and you're about to break anyway. > + break; > + } > } > + if (dev_found) > + list_del(&ipmi_device->head); > mutex_unlock(&driver_data.ipmi_lock); > + > + if (dev_found) { > + ipmi_flush_tx_msg(ipmi_device); > + acpi_ipmi_dev_put(ipmi_device); > + } > } > > /* -------------------------------------------------------------------------- > @@ -400,7 +467,8 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address, > void *handler_context, void *region_context) > { > struct acpi_ipmi_msg *tx_msg; > - struct acpi_ipmi_device *ipmi_device = handler_context; > + int iface = (long)handler_context; > + struct acpi_ipmi_device *ipmi_device; > int err, rem_time; > acpi_status status; > unsigned long flags; > @@ -414,12 +482,15 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address, > if ((function & ACPI_IO_MASK) == ACPI_READ) > return AE_TYPE; > > - if (!ipmi_device->user_interface) > + ipmi_device = acpi_ipmi_get_targeted_smi(iface); > + if (!ipmi_device) > return AE_NOT_EXIST; > > tx_msg = acpi_alloc_ipmi_msg(ipmi_device); > - if (!tx_msg) > - return AE_NO_MEMORY; > + if (!tx_msg) { > + status = AE_NO_MEMORY; > + goto out_ref; > + } > > if (acpi_format_ipmi_request(tx_msg, address, value) != 0) { > status = AE_TYPE; > @@ -449,6 +520,8 @@ out_list: > spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags); > out_msg: > kfree(tx_msg); > +out_ref: > + acpi_ipmi_dev_put(ipmi_device); > return status; > } > > @@ -473,7 +546,7 @@ static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi) > status = acpi_install_address_space_handler(ipmi->handle, > ACPI_ADR_SPACE_IPMI, > &acpi_ipmi_space_handler, > - NULL, ipmi); > + NULL, (void *)((long)ipmi->ipmi_ifnum)); > if (ACPI_FAILURE(status)) { > struct pnp_dev *pnp_dev = ipmi->pnp_dev; > dev_warn(&pnp_dev->dev, "Can't register IPMI opregion space " > @@ -484,36 +557,6 @@ static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi) > return 0; > } > > -static void acpi_add_ipmi_device(struct acpi_ipmi_device *ipmi_device) > -{ > - > - INIT_LIST_HEAD(&ipmi_device->head); > - > - spin_lock_init(&ipmi_device->tx_msg_lock); > - INIT_LIST_HEAD(&ipmi_device->tx_msg_list); > - ipmi_install_space_handler(ipmi_device); > - > - list_add_tail(&ipmi_device->head, &driver_data.ipmi_devices); > -} > - > -static void acpi_remove_ipmi_device(struct acpi_ipmi_device *ipmi_device) > -{ > - /* > - * If the IPMI user interface is created, it should be > - * destroyed. > - */ > - if (ipmi_device->user_interface) { > - ipmi_destroy_user(ipmi_device->user_interface); > - ipmi_device->user_interface = NULL; > - } > - /* flush the Tx_msg list */ > - if (!list_empty(&ipmi_device->tx_msg_list)) > - ipmi_flush_tx_msg(ipmi_device); > - > - list_del(&ipmi_device->head); > - ipmi_remove_space_handler(ipmi_device); > -} > - > static int __init acpi_ipmi_init(void) > { > int result = 0; > @@ -530,7 +573,7 @@ static int __init acpi_ipmi_init(void) > > static void __exit acpi_ipmi_exit(void) > { > - struct acpi_ipmi_device *ipmi_device, *temp; > + struct acpi_ipmi_device *ipmi_device; > > if (acpi_disabled) > return; > @@ -544,11 +587,17 @@ static void __exit acpi_ipmi_exit(void) > * handler and free it. > */ > mutex_lock(&driver_data.ipmi_lock); > - list_for_each_entry_safe(ipmi_device, temp, > - &driver_data.ipmi_devices, head) { > - acpi_remove_ipmi_device(ipmi_device); > - put_device(ipmi_device->smi_data.dev); > - kfree(ipmi_device); > + while (!list_empty(&driver_data.ipmi_devices)) { > + ipmi_device = list_first_entry(&driver_data.ipmi_devices, > + struct acpi_ipmi_device, > + head); > + list_del(&ipmi_device->head); > + mutex_unlock(&driver_data.ipmi_lock); > + > + ipmi_flush_tx_msg(ipmi_device); > + acpi_ipmi_dev_put(ipmi_device); > + > + mutex_lock(&driver_data.ipmi_lock); > } > mutex_unlock(&driver_data.ipmi_lock); > } > -- I speak only for myself. Rafael J. Wysocki, Intel Open Source Technology Center. -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html