This change will send an OFFLINE event to udev with the ERROR=DEAD environment variable set when the HC dies. By notifying user space the appropriate policies can be applied. i.e., * Collect error logs. * Notify the user that USB is no longer functional. * Perform a graceful reboot. Reported-by: kbuild test robot <lkp@xxxxxxxxx> Signed-off-by: Raul E Rangel <rrangel@xxxxxxxxxxxx> --- I wasn't able to find any good examples of other drivers sending a dead notification. Use an EVENT= format https://github.com/torvalds/linux/blob/master/drivers/acpi/dock.c#L302 https://github.com/torvalds/linux/blob/master/drivers/net/wireless/ath/wil6210/interrupt.c#L497 Uses SDEV_MEDIA_CHANGE= https://github.com/torvalds/linux/blob/master/drivers/scsi/scsi_lib.c#L2318 Uses ERROR=1. https://chromium.googlesource.com/chromiumos/third_party/kernel/+/7f6d8aec5803aac44192f03dce5637b66cda7abf/drivers/input/touchscreen/atmel_mxt_ts.c#1581 I'm not a fan because it doesn't signal what the error was. Changes in v4: - Move hcd_died_work out of CONFIG_PM ifdef - Make env static, but not const Changes in v3: - Added documentation - Removed use of lock and null check - Changed event to OFFLINE + ERROR=DEAD Changes in v2: - Check that the root hub still exists before sending the uevent. - Ensure died_work has completed before deallocating. Documentation/ABI/testing/usb-uevent | 27 +++++++++++++++++++++++++++ drivers/usb/core/hcd.c | 24 ++++++++++++++++++++++++ include/linux/usb/hcd.h | 1 + 3 files changed, 52 insertions(+) create mode 100644 Documentation/ABI/testing/usb-uevent diff --git a/Documentation/ABI/testing/usb-uevent b/Documentation/ABI/testing/usb-uevent new file mode 100644 index 000000000000..d35c3cad892c --- /dev/null +++ b/Documentation/ABI/testing/usb-uevent @@ -0,0 +1,27 @@ +What: Raise a uevent when a USB Host Controller has died +Date: 2019-04-17 +KernelVersion: 5.2 +Contact: linux-usb@xxxxxxxxxxxxxxx +Description: When the USB Host Controller has entered a state where it is no + longer functional a uevent will be raised. The uevent will + contain ACTION=offline and ERROR=DEAD. + + Here is an example taken using udevadm monitor -p: + + KERNEL[130.428945] offline /devices/pci0000:00/0000:00:10.0/usb2 (usb) + ACTION=offline + BUSNUM=002 + DEVNAME=/dev/bus/usb/002/001 + DEVNUM=001 + DEVPATH=/devices/pci0000:00/0000:00:10.0/usb2 + DEVTYPE=usb_device + DRIVER=usb + ERROR=DEAD + MAJOR=189 + MINOR=128 + PRODUCT=1d6b/2/414 + SEQNUM=2168 + SUBSYSTEM=usb + TYPE=9/0/1 + +Users: chromium-os-dev@xxxxxxxxxxxx diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 975d7c1288e3..0c0fd574bab5 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2448,6 +2448,19 @@ EXPORT_SYMBOL_GPL(usb_hcd_irq); /*-------------------------------------------------------------------------*/ +/* Workqueue routine for when the root-hub has died. */ +static void hcd_died_work(struct work_struct *work) +{ + struct usb_hcd *hcd = container_of(work, struct usb_hcd, died_work); + static char *env[] = { + "ERROR=DEAD", + NULL + }; + + /* Notify user space that the host controller has died */ + kobject_uevent_env(&hcd->self.root_hub->dev.kobj, KOBJ_OFFLINE, env); +} + /** * usb_hc_died - report abnormal shutdown of a host controller (bus glue) * @hcd: pointer to the HCD representing the controller @@ -2488,6 +2501,13 @@ void usb_hc_died (struct usb_hcd *hcd) usb_kick_hub_wq(hcd->self.root_hub); } } + + /* Handle the case where this function gets called with a shared HCD */ + if (usb_hcd_is_primary_hcd(hcd)) + schedule_work(&hcd->died_work); + else + schedule_work(&hcd->primary_hcd->died_work); + spin_unlock_irqrestore (&hcd_root_hub_lock, flags); /* Make sure that the other roothub is also deallocated. */ } @@ -2555,6 +2575,8 @@ struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver, INIT_WORK(&hcd->wakeup_work, hcd_resume_work); #endif + INIT_WORK(&hcd->died_work, hcd_died_work); + hcd->driver = driver; hcd->speed = driver->flags & HCD_MASK; hcd->product_desc = (driver->product_desc) ? driver->product_desc : @@ -2908,6 +2930,7 @@ int usb_add_hcd(struct usb_hcd *hcd, #ifdef CONFIG_PM cancel_work_sync(&hcd->wakeup_work); #endif + cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); usb_disconnect(&rhdev); /* Sets rhdev to NULL */ mutex_unlock(&usb_bus_idr_lock); @@ -2968,6 +2991,7 @@ void usb_remove_hcd(struct usb_hcd *hcd) #ifdef CONFIG_PM cancel_work_sync(&hcd->wakeup_work); #endif + cancel_work_sync(&hcd->died_work); mutex_lock(&usb_bus_idr_lock); usb_disconnect(&rhdev); /* Sets rhdev to NULL */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 695931b03684..66a24b13e2ab 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -98,6 +98,7 @@ struct usb_hcd { #ifdef CONFIG_PM struct work_struct wakeup_work; /* for remote wakeup */ #endif + struct work_struct died_work; /* for when the device dies */ /* * hardware info/state -- 2.21.0.392.gf8f6787159e-goog