Hi En-Wei,
On 7/22/24 10:36 AM, En-Wei Wu wrote:
> On a Dell Venue 7140 tablet with the keyboard/touchpad/battery dock, when
> disconnecting the dock there is a kernel bug:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000018
>
> And this causes the following things not to work:
> 1. Suspend to idle - the system simply hangs
> 2. Poweroff normally (the only way is forcing it via long press the power button)
> 3. USB ports: both the USB port on the tablet and also plugging in the keyboard again
>
> The error message above (plus some crash dump) isn't so useful for debugging, but we
> have noticed that there is a debug message shown before the crash dump:
>
> intel-vbtn INT33D6:00: Registering Intel Virtual Switches input-dev after receiving
> a switch event
>
> The messages above is shown right after the dock is disconnected, and the message implies:
>
> We failed to set the priv->has_switches to true in the probe function since the
> Dell Venue 11 Pro 7140 is not shown in the dmi_switches_allow_list, and this causes a problem
> that no input_register_device() on the switch device is called. Afterward, When a user
> disconnects the dock, intel-vbtn receives the ACPI event and finally find that there is a
> switch out there. So intel-vbtn starts to register the switch device, which may be a dangerous
> behavior since there might be some device-related objects/structs that has been freed (due to
> the disconnection of the dock).
>
> To solve this problem from the root cause, simply add the Dell Venue 11 pro 7140 to the
> dmi_switches_allow_list.
> (The Dell Venue 11 Pro 7140 is a 2-in-1 model that has chassis-type "Portable".)
>
> BugLink: https://bugs.launchpad.net/bugs/2073001
>
> Fixes: 8169bd3e6e19 ("platform/x86: intel-vbtn: Switch to an allow-list
> for SW_TABLET_MODE reporting")
> Reported-by: Kostadin Stoilov <kmstoilov@xxxxxxxxx>
> Tested-by: Kostadin Stoilov <kmstoilov@xxxxxxxxx>
> Signed-off-by: En-Wei Wu <en-wei.wu@xxxxxxxxxxxxx>
Thank you for your patch. Looking at the logs from the launchpad bug I noticed that
intel-vbtn INT33D6:00: Registering Intel Virtual Switches input-dev after receiving a switch event
is reported in the logs twice. Which strongly suggests that the intel-vbtn notify_handler()
function is racing with itself.
In the past ACPI notify handlers could never run more then once (at the same time)
but since commit e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on
all CPUs") ACPI notify handlers like the intel-vbtn notify_handler() may
run on multiple CPU cores racing with themselves.
I believe that this is the real problem here. I have attached a patch which should
fix this. Can you build a test-kernel with this patch instead of your patch and
ask the reported of: https://bugs.launchpad.net/bugs/2073001
To test a kernel with the attached patch (and without your patch) to confirm
that this fixes it in a more generic manner ?
Regards,
Hans
> ---
> drivers/platform/x86/intel/vbtn.c | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
> index 9b7ce03ba085..46d07d3cd34b 100644
> --- a/drivers/platform/x86/intel/vbtn.c
> +++ b/drivers/platform/x86/intel/vbtn.c
> @@ -235,6 +235,12 @@ static const struct dmi_system_id dmi_switches_allow_list[] = {
> DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7130"),
> },
> },
> + {
> + .matches = {
> + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
> + DMI_MATCH(DMI_PRODUCT_NAME, "Venue 11 Pro 7140"),
> + },
> + },
> {
> .matches = {
> DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
From 29af35bfbf6caa0d27530150dd0adbf88e3c6eab Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@xxxxxxxxxx>
Date: Mon, 22 Jul 2024 11:06:36 +0200
Subject: [PATCH] platform/x86: intel-vbtn: Protect ACPI notify handler against
recursion
Since commit e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on
all CPUs") ACPI notify handlers like the intel-vbtn notify_handler() may
run on multiple CPU cores racing with themselves.
This race gets hit on Dell Venue 7140 tablets when undocking from
the keyboard, causing the handler to try and register priv->switches_dev
twice, as can be seen from the dev_info() message getting logged twice:
[ 83.861800] intel-vbtn INT33D6:00: Registering Intel Virtual Switches input-dev after receiving a switch event
[ 83.861858] input: Intel Virtual Switches as /devices/pci0000:00/0000:00:1f.0/PNP0C09:00/INT33D6:00/input/input17
[ 83.861865] intel-vbtn INT33D6:00: Registering Intel Virtual Switches input-dev after receiving a switch event
After which things go seriously wrong:
[ 83.861872] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:1f.0/PNP0C09:00/INT33D6:00/input/input17'
...
[ 83.861967] kobject: kobject_add_internal failed for input17 with -EEXIST, don't try to register things with the same name in the same directory.
[ 83.877338] BUG: kernel NULL pointer dereference, address: 0000000000000018
...
Protect intel-vbtn notify_handler() from racing with itself with a mutex
to fix this.
Fixes: e2ffcda16290 ("ACPI: OSL: Allow Notify () handlers to run on all CPUs")
Reported-by: En-Wei Wu <en-wei.wu@xxxxxxxxxxxxx>
Closes: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/2073001
Signed-off-by: Hans de Goede <hdegoede@xxxxxxxxxx>
---
drivers/platform/x86/intel/vbtn.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c
index 9b7ce03ba085..93deda7daac4 100644
--- a/drivers/platform/x86/intel/vbtn.c
+++ b/drivers/platform/x86/intel/vbtn.c
@@ -12,6 +12,7 @@
#include <linux/input/sparse-keymap.h>
#include <linux/kernel.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/platform_device.h>
#include <linux/suspend.h>
#include "../dual_accel_detect.h"
@@ -66,6 +67,7 @@ static const struct key_entry intel_vbtn_switchmap[] = {
};
struct intel_vbtn_priv {
+ struct mutex mutex; /* Avoid notify_handler() racing with itself */
struct input_dev *buttons_dev;
struct input_dev *switches_dev;
bool dual_accel;
@@ -155,30 +157,32 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
bool autorelease;
int ret;
+ mutex_lock(&priv->mutex);
+
if ((ke = sparse_keymap_entry_from_scancode(priv->buttons_dev, event))) {
if (!priv->has_buttons) {
dev_warn(&device->dev, "Warning: received 0x%02x button event on a device without buttons, please report this.\n",
event);
- return;
+ goto out_unlock;
}
input_dev = priv->buttons_dev;
} else if ((ke = sparse_keymap_entry_from_scancode(priv->switches_dev, event))) {
if (!priv->has_switches) {
/* See dual_accel_detect.h for more info */
if (priv->dual_accel)
- return;
+ goto out_unlock;
dev_info(&device->dev, "Registering Intel Virtual Switches input-dev after receiving a switch event\n");
ret = input_register_device(priv->switches_dev);
if (ret)
- return;
+ goto out_unlock;
priv->has_switches = true;
}
input_dev = priv->switches_dev;
} else {
dev_dbg(&device->dev, "unknown event index 0x%x\n", event);
- return;
+ goto out_unlock;
}
if (priv->wakeup_mode) {
@@ -189,7 +193,7 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
* mirroring how the drivers/acpi/button.c code skips this too.
*/
if (ke->type == KE_KEY)
- return;
+ goto out_unlock;
}
/*
@@ -200,6 +204,9 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
autorelease = val && (!ke_rel || ke_rel->type == KE_IGNORE);
sparse_keymap_report_event(input_dev, event, val, autorelease);
+
+out_unlock:
+ mutex_unlock(&priv->mutex);
}
/*
@@ -290,6 +297,10 @@ static int intel_vbtn_probe(struct platform_device *device)
return -ENOMEM;
dev_set_drvdata(&device->dev, priv);
+ err = devm_mutex_init(&device->dev, &priv->mutex);
+ if (err)
+ return err;
+
priv->dual_accel = dual_accel;
priv->has_buttons = has_buttons;
priv->has_switches = has_switches;
--
2.45.2