Re: [PATCH 04/13] ACPI/IPMI: Fix race caused by the unprotected ACPI IPMI user

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tuesday, July 23, 2013 04:09:26 PM Lv Zheng wrote:
> This patch uses reference counting to fix the race caused by the
> unprotected ACPI IPMI user.
> 
> As the acpi_ipmi_device->user_interface check in acpi_ipmi_space_handler()
> can happen before setting user_interface to NULL and codes after the check
> in acpi_ipmi_space_handler() can happen after user_interface becoming NULL,
> then the on-going acpi_ipmi_space_handler() still can pass an invalid
> acpi_ipmi_device->user_interface to ipmi_request_settime().  Such race
> condition is not allowed by the IPMI layer's API design as crash will
> happen in ipmi_request_settime().
> In IPMI layer, smi_gone()/new_smi() callbacks are protected by
> smi_watchers_mutex, thus their invocations are serialized.  But as a new
> smi can re-use the freed intf_num, it requires that the callback
> implementation must not use intf_num as an identification mean or it must
> ensure all references to the previous smi are all dropped before exiting
> smi_gone() callback.  In case of acpi_ipmi module, this means
> ipmi_flush_tx_msg() must ensure all on-going IPMI transfers are completed
> before exiting ipmi_flush_tx_msg().
> 
> This patch follows ipmi_devintf.c design:
> 1. Invoking ipmi_destroy_user() after the reference count of
>    acpi_ipmi_device dropping to 0, this matches IPMI layer's API calling
>    rule on ipmi_destroy_user() and ipmi_request_settime().
> 2. References of acpi_ipmi_device dropping to 1 means tx_msg related to
>    this acpi_ipmi_device are all freed, this can be used to implement the
>    new flushing mechanism.  Note complete() must be retried so that the
>    on-going tx_msg won't block flushing at the point to add tx_msg into
>    tx_msg_list where reference of acpi_ipmi_device is held.  This matches
>    the IPMI layer's callback rule on smi_gone()/new_smi() serialization.
> 3. ipmi_flush_tx_msg() is performed after deleting acpi_ipmi_device from
>    the list so that no new tx_msg can be created after entering flushing
>    process.
> 4. The flushing of tx_msg is also moved out of ipmi_lock in this patch.
> 
> The forthcoming IPMI operation region handler installation changes also
> requires acpi_ipmi_device be handled in the reference counting style.
> 
> Authorship is also updated due to this design change.
> 
> Signed-off-by: Lv Zheng <lv.zheng@xxxxxxxxx>
> Cc: Zhao Yakui <yakui.zhao@xxxxxxxxx>
> Reviewed-by: Huang Ying <ying.huang@xxxxxxxxx>
> ---
>  drivers/acpi/acpi_ipmi.c |  249 +++++++++++++++++++++++++++-------------------
>  1 file changed, 149 insertions(+), 100 deletions(-)
> 
> diff --git a/drivers/acpi/acpi_ipmi.c b/drivers/acpi/acpi_ipmi.c
> index 527ee43..cbf25e0 100644
> --- a/drivers/acpi/acpi_ipmi.c
> +++ b/drivers/acpi/acpi_ipmi.c
> @@ -1,8 +1,9 @@
>  /*
>   *  acpi_ipmi.c - ACPI IPMI opregion
>   *
> - *  Copyright (C) 2010 Intel Corporation
> - *  Copyright (C) 2010 Zhao Yakui <yakui.zhao@xxxxxxxxx>
> + *  Copyright (C) 2010, 2013 Intel Corporation
> + *    Author: Zhao Yakui <yakui.zhao@xxxxxxxxx>
> + *            Lv Zheng <lv.zheng@xxxxxxxxx>
>   *
>   * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>   *
> @@ -67,6 +68,7 @@ struct acpi_ipmi_device {
>  	long curr_msgid;
>  	unsigned long flags;
>  	struct ipmi_smi_info smi_data;
> +	atomic_t refcnt;

Can you use a kref instead?

>  };
>  
>  struct ipmi_driver_data {
> @@ -107,8 +109,8 @@ struct acpi_ipmi_buffer {
>  static void ipmi_register_bmc(int iface, struct device *dev);
>  static void ipmi_bmc_gone(int iface);
>  static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data);
> -static void acpi_add_ipmi_device(struct acpi_ipmi_device *ipmi_device);
> -static void acpi_remove_ipmi_device(struct acpi_ipmi_device *ipmi_device);
> +static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi);
> +static void ipmi_remove_space_handler(struct acpi_ipmi_device *ipmi);
>  
>  static struct ipmi_driver_data driver_data = {
>  	.ipmi_devices = LIST_HEAD_INIT(driver_data.ipmi_devices),
> @@ -122,6 +124,80 @@ static struct ipmi_driver_data driver_data = {
>  	},
>  };
>  
> +static struct acpi_ipmi_device *
> +ipmi_dev_alloc(int iface, struct ipmi_smi_info *smi_data, acpi_handle handle)
> +{
> +	struct acpi_ipmi_device *ipmi_device;
> +	int err;
> +	ipmi_user_t user;
> +
> +	ipmi_device = kzalloc(sizeof(*ipmi_device), GFP_KERNEL);
> +	if (!ipmi_device)
> +		return NULL;
> +
> +	atomic_set(&ipmi_device->refcnt, 1);
> +	INIT_LIST_HEAD(&ipmi_device->head);
> +	INIT_LIST_HEAD(&ipmi_device->tx_msg_list);
> +	spin_lock_init(&ipmi_device->tx_msg_lock);
> +
> +	ipmi_device->handle = handle;
> +	ipmi_device->pnp_dev = to_pnp_dev(get_device(smi_data->dev));
> +	memcpy(&ipmi_device->smi_data, smi_data, sizeof(struct ipmi_smi_info));
> +	ipmi_device->ipmi_ifnum = iface;
> +
> +	err = ipmi_create_user(iface, &driver_data.ipmi_hndlrs,
> +			       ipmi_device, &user);
> +	if (err) {
> +		put_device(smi_data->dev);
> +		kfree(ipmi_device);
> +		return NULL;
> +	}
> +	ipmi_device->user_interface = user;
> +	ipmi_install_space_handler(ipmi_device);
> +
> +	return ipmi_device;
> +}
> +
> +static struct acpi_ipmi_device *
> +acpi_ipmi_dev_get(struct acpi_ipmi_device *ipmi_device)
> +{
> +	if (ipmi_device)
> +		atomic_inc(&ipmi_device->refcnt);
> +	return ipmi_device;
> +}
> +
> +static void ipmi_dev_release(struct acpi_ipmi_device *ipmi_device)
> +{
> +	ipmi_remove_space_handler(ipmi_device);
> +	ipmi_destroy_user(ipmi_device->user_interface);
> +	put_device(ipmi_device->smi_data.dev);
> +	kfree(ipmi_device);
> +}
> +
> +static void acpi_ipmi_dev_put(struct acpi_ipmi_device *ipmi_device)
> +{
> +	if (ipmi_device && atomic_dec_and_test(&ipmi_device->refcnt))
> +		ipmi_dev_release(ipmi_device);
> +}
> +
> +static struct acpi_ipmi_device *acpi_ipmi_get_targeted_smi(int iface)
> +{
> +	int dev_found = 0;
> +	struct acpi_ipmi_device *ipmi_device;
> +

Why don't you do

	struct acpi_ipmi_device *ipmi_device, *ret = NULL;

and then ->

> +	mutex_lock(&driver_data.ipmi_lock);
> +	list_for_each_entry(ipmi_device, &driver_data.ipmi_devices, head) {
> +		if (ipmi_device->ipmi_ifnum == iface) {

->			ret = ipmi_device; ->

> +			dev_found = 1;
> +			acpi_ipmi_dev_get(ipmi_device);
> +			break;
> +		}
> +	}
> +	mutex_unlock(&driver_data.ipmi_lock);
> +
> +	return dev_found ? ipmi_device : NULL;

->	return ret;

> +}
> +
>  static struct acpi_ipmi_msg *acpi_alloc_ipmi_msg(struct acpi_ipmi_device *ipmi)
>  {
>  	struct acpi_ipmi_msg *ipmi_msg;
> @@ -228,25 +304,24 @@ static void acpi_format_ipmi_response(struct acpi_ipmi_msg *msg,
>  static void ipmi_flush_tx_msg(struct acpi_ipmi_device *ipmi)
>  {
>  	struct acpi_ipmi_msg *tx_msg, *temp;
> -	int count = HZ / 10;
> -	struct pnp_dev *pnp_dev = ipmi->pnp_dev;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&ipmi->tx_msg_lock, flags);
> -	list_for_each_entry_safe(tx_msg, temp, &ipmi->tx_msg_list, head) {
> -		/* wake up the sleep thread on the Tx msg */
> -		complete(&tx_msg->tx_complete);
> -	}
> -	spin_unlock_irqrestore(&ipmi->tx_msg_lock, flags);
> -
> -	/* wait for about 100ms to flush the tx message list */
> -	while (count--) {
> -		if (list_empty(&ipmi->tx_msg_list))
> -			break;
> -		schedule_timeout(1);
> +	/*
> +	 * NOTE: Synchronous Flushing
> +	 * Wait until refnct dropping to 1 - no other users unless this
> +	 * context.  This function should always be called before
> +	 * acpi_ipmi_device destruction.
> +	 */
> +	while (atomic_read(&ipmi->refcnt) > 1) {

Isn't this racy?  What if we see that the refcount is 1 and break the loop,
but someone else bumps up the refcount at the same time?

> +		spin_lock_irqsave(&ipmi->tx_msg_lock, flags);
> +		list_for_each_entry_safe(tx_msg, temp,
> +					 &ipmi->tx_msg_list, head) {
> +			/* wake up the sleep thread on the Tx msg */
> +			complete(&tx_msg->tx_complete);
> +		}
> +		spin_unlock_irqrestore(&ipmi->tx_msg_lock, flags);
> +		schedule_timeout_uninterruptible(msecs_to_jiffies(1));
>  	}
> -	if (!list_empty(&ipmi->tx_msg_list))
> -		dev_warn(&pnp_dev->dev, "tx msg list is not NULL\n");
>  }
>  
>  static void ipmi_msg_handler(struct ipmi_recv_msg *msg, void *user_msg_data)
> @@ -304,22 +379,26 @@ static void ipmi_register_bmc(int iface, struct device *dev)
>  {
>  	struct acpi_ipmi_device *ipmi_device, *temp;
>  	struct pnp_dev *pnp_dev;
> -	ipmi_user_t		user;
>  	int err;
>  	struct ipmi_smi_info smi_data;
>  	acpi_handle handle;
>  
>  	err = ipmi_get_smi_info(iface, &smi_data);
> -
>  	if (err)
>  		return;
>  
> -	if (smi_data.addr_src != SI_ACPI) {
> -		put_device(smi_data.dev);
> -		return;
> -	}
> -
> +	if (smi_data.addr_src != SI_ACPI)
> +		goto err_ref;
>  	handle = smi_data.addr_info.acpi_info.acpi_handle;
> +	if (!handle)
> +		goto err_ref;
> +	pnp_dev = to_pnp_dev(smi_data.dev);
> +
> +	ipmi_device = ipmi_dev_alloc(iface, &smi_data, handle);
> +	if (!ipmi_device) {
> +		dev_warn(&pnp_dev->dev, "Can't create IPMI user interface\n");
> +		goto err_ref;
> +	}
>  
>  	mutex_lock(&driver_data.ipmi_lock);
>  	list_for_each_entry(temp, &driver_data.ipmi_devices, head) {
> @@ -328,54 +407,42 @@ static void ipmi_register_bmc(int iface, struct device *dev)
>  		 * to the device list, don't add it again.
>  		 */
>  		if (temp->handle == handle)
> -			goto out;
> +			goto err_lock;
>  	}
>  
> -	ipmi_device = kzalloc(sizeof(*ipmi_device), GFP_KERNEL);
> -
> -	if (!ipmi_device)
> -		goto out;
> -
> -	pnp_dev = to_pnp_dev(smi_data.dev);
> -	ipmi_device->handle = handle;
> -	ipmi_device->pnp_dev = pnp_dev;
> -
> -	err = ipmi_create_user(iface, &driver_data.ipmi_hndlrs,
> -					ipmi_device, &user);
> -	if (err) {
> -		dev_warn(&pnp_dev->dev, "Can't create IPMI user interface\n");
> -		kfree(ipmi_device);
> -		goto out;
> -	}
> -	acpi_add_ipmi_device(ipmi_device);
> -	ipmi_device->user_interface = user;
> -	ipmi_device->ipmi_ifnum = iface;
> +	list_add_tail(&ipmi_device->head, &driver_data.ipmi_devices);
>  	mutex_unlock(&driver_data.ipmi_lock);
> -	memcpy(&ipmi_device->smi_data, &smi_data, sizeof(struct ipmi_smi_info));
> +	put_device(smi_data.dev);
>  	return;
>  
> -out:
> +err_lock:
>  	mutex_unlock(&driver_data.ipmi_lock);
> +	ipmi_dev_release(ipmi_device);
> +err_ref:
>  	put_device(smi_data.dev);
>  	return;
>  }
>  
>  static void ipmi_bmc_gone(int iface)
>  {
> -	struct acpi_ipmi_device *ipmi_device, *temp;
> +	int dev_found = 0;
> +	struct acpi_ipmi_device *ipmi_device;
>  
>  	mutex_lock(&driver_data.ipmi_lock);
> -	list_for_each_entry_safe(ipmi_device, temp,
> -				&driver_data.ipmi_devices, head) {
> -		if (ipmi_device->ipmi_ifnum != iface)
> -			continue;
> -
> -		acpi_remove_ipmi_device(ipmi_device);
> -		put_device(ipmi_device->smi_data.dev);
> -		kfree(ipmi_device);
> -		break;
> +	list_for_each_entry(ipmi_device, &driver_data.ipmi_devices, head) {
> +		if (ipmi_device->ipmi_ifnum == iface) {
> +			dev_found = 1;

You can do the list_del() here, because you're under the mutex, so others
won't see the list in an inconsistens state and you're about to break anyway.

> +			break;
> +		}
>  	}
> +	if (dev_found)
> +		list_del(&ipmi_device->head);
>  	mutex_unlock(&driver_data.ipmi_lock);
> +
> +	if (dev_found) {
> +		ipmi_flush_tx_msg(ipmi_device);
> +		acpi_ipmi_dev_put(ipmi_device);
> +	}
>  }
>  
>  /* --------------------------------------------------------------------------
> @@ -400,7 +467,8 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address,
>  			void *handler_context, void *region_context)
>  {
>  	struct acpi_ipmi_msg *tx_msg;
> -	struct acpi_ipmi_device *ipmi_device = handler_context;
> +	int iface = (long)handler_context;
> +	struct acpi_ipmi_device *ipmi_device;
>  	int err, rem_time;
>  	acpi_status status;
>  	unsigned long flags;
> @@ -414,12 +482,15 @@ acpi_ipmi_space_handler(u32 function, acpi_physical_address address,
>  	if ((function & ACPI_IO_MASK) == ACPI_READ)
>  		return AE_TYPE;
>  
> -	if (!ipmi_device->user_interface)
> +	ipmi_device = acpi_ipmi_get_targeted_smi(iface);
> +	if (!ipmi_device)
>  		return AE_NOT_EXIST;
>  
>  	tx_msg = acpi_alloc_ipmi_msg(ipmi_device);
> -	if (!tx_msg)
> -		return AE_NO_MEMORY;
> +	if (!tx_msg) {
> +		status = AE_NO_MEMORY;
> +		goto out_ref;
> +	}
>  
>  	if (acpi_format_ipmi_request(tx_msg, address, value) != 0) {
>  		status = AE_TYPE;
> @@ -449,6 +520,8 @@ out_list:
>  	spin_unlock_irqrestore(&ipmi_device->tx_msg_lock, flags);
>  out_msg:
>  	kfree(tx_msg);
> +out_ref:
> +	acpi_ipmi_dev_put(ipmi_device);
>  	return status;
>  }
>  
> @@ -473,7 +546,7 @@ static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi)
>  	status = acpi_install_address_space_handler(ipmi->handle,
>  						    ACPI_ADR_SPACE_IPMI,
>  						    &acpi_ipmi_space_handler,
> -						    NULL, ipmi);
> +						    NULL, (void *)((long)ipmi->ipmi_ifnum));
>  	if (ACPI_FAILURE(status)) {
>  		struct pnp_dev *pnp_dev = ipmi->pnp_dev;
>  		dev_warn(&pnp_dev->dev, "Can't register IPMI opregion space "
> @@ -484,36 +557,6 @@ static int ipmi_install_space_handler(struct acpi_ipmi_device *ipmi)
>  	return 0;
>  }
>  
> -static void acpi_add_ipmi_device(struct acpi_ipmi_device *ipmi_device)
> -{
> -
> -	INIT_LIST_HEAD(&ipmi_device->head);
> -
> -	spin_lock_init(&ipmi_device->tx_msg_lock);
> -	INIT_LIST_HEAD(&ipmi_device->tx_msg_list);
> -	ipmi_install_space_handler(ipmi_device);
> -
> -	list_add_tail(&ipmi_device->head, &driver_data.ipmi_devices);
> -}
> -
> -static void acpi_remove_ipmi_device(struct acpi_ipmi_device *ipmi_device)
> -{
> -	/*
> -	 * If the IPMI user interface is created, it should be
> -	 * destroyed.
> -	 */
> -	if (ipmi_device->user_interface) {
> -		ipmi_destroy_user(ipmi_device->user_interface);
> -		ipmi_device->user_interface = NULL;
> -	}
> -	/* flush the Tx_msg list */
> -	if (!list_empty(&ipmi_device->tx_msg_list))
> -		ipmi_flush_tx_msg(ipmi_device);
> -
> -	list_del(&ipmi_device->head);
> -	ipmi_remove_space_handler(ipmi_device);
> -}
> -
>  static int __init acpi_ipmi_init(void)
>  {
>  	int result = 0;
> @@ -530,7 +573,7 @@ static int __init acpi_ipmi_init(void)
>  
>  static void __exit acpi_ipmi_exit(void)
>  {
> -	struct acpi_ipmi_device *ipmi_device, *temp;
> +	struct acpi_ipmi_device *ipmi_device;
>  
>  	if (acpi_disabled)
>  		return;
> @@ -544,11 +587,17 @@ static void __exit acpi_ipmi_exit(void)
>  	 * handler and free it.
>  	 */
>  	mutex_lock(&driver_data.ipmi_lock);
> -	list_for_each_entry_safe(ipmi_device, temp,
> -				&driver_data.ipmi_devices, head) {
> -		acpi_remove_ipmi_device(ipmi_device);
> -		put_device(ipmi_device->smi_data.dev);
> -		kfree(ipmi_device);
> +	while (!list_empty(&driver_data.ipmi_devices)) {
> +		ipmi_device = list_first_entry(&driver_data.ipmi_devices,
> +					       struct acpi_ipmi_device,
> +					       head);
> +		list_del(&ipmi_device->head);
> +		mutex_unlock(&driver_data.ipmi_lock);
> +
> +		ipmi_flush_tx_msg(ipmi_device);
> +		acpi_ipmi_dev_put(ipmi_device);
> +
> +		mutex_lock(&driver_data.ipmi_lock);
>  	}
>  	mutex_unlock(&driver_data.ipmi_lock);
>  }
> 
-- 
I speak only for myself.
Rafael J. Wysocki, Intel Open Source Technology Center.
--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]