Hi All, On 20.06.2022 00:03, Christian Marangi wrote: > On a devfreq PROBE_DEFER, the freq_table in the driver profile struct, > is never reset and may be leaved in an undefined state. > > This comes from the fact that we store the freq_table in the driver > profile struct that is commonly defined as static and not reset on > PROBE_DEFER. > We currently skip the reinit of the freq_table if we found > it's already defined since a driver may declare his own freq_table. > > This logic is flawed in the case devfreq core generate a freq_table, set > it in the profile struct and then PROBE_DEFER, freeing the freq_table. > In this case devfreq will found a NOT NULL freq_table that has been > freed, skip the freq_table generation and probe the driver based on the > wrong table. > > To fix this and correctly handle PROBE_DEFER, use a local freq_table and > max_state in the devfreq struct and never modify the freq_table present > in the profile struct if it does provide it. > > Fixes: 0ec09ac2cebe ("PM / devfreq: Set the freq_table of devfreq device") > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Christian Marangi <ansuelsmth@xxxxxxxxx> > --- This patch landed in linux next-20220630 as commit b5d281f6c16d ("PM / devfreq: Rework freq_table to be local to devfreq struct"). Unfortunately it causes the following regression on my Exynos based test systems: 8<--- cut here --- Unable to handle kernel NULL pointer dereference at virtual address 00000000 [00000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM Modules linked in: CPU: 3 PID: 49 Comm: kworker/u8:3 Not tainted 5.19.0-rc4-next-20220630 #5312 Hardware name: Samsung Exynos (Flattened Device Tree) Workqueue: events_unbound deferred_probe_work_func PC is at exynos_bus_probe+0x604/0x684 LR is at device_add+0x14c/0x908 pc : [<c090aef4>] lr : [<c06cf77c>] psr: 80000053 ... Process kworker/u8:3 (pid: 49, stack limit = 0x(ptrval)) Stack: (0xf0a15d30 to 0xf0a16000) ... exynos_bus_probe from platform_probe+0x5c/0xb8 platform_probe from really_probe+0xe0/0x414 really_probe from __driver_probe_device+0xa0/0x208 __driver_probe_device from driver_probe_device+0x30/0xc0 driver_probe_device from __device_attach_driver+0xa4/0x11c __device_attach_driver from bus_for_each_drv+0x7c/0xc0 bus_for_each_drv from __device_attach+0xac/0x20c __device_attach from bus_probe_device+0x88/0x90 bus_probe_device from deferred_probe_work_func+0x98/0xe0 deferred_probe_work_func from process_one_work+0x288/0x774 process_one_work from worker_thread+0x44/0x504 worker_thread from kthread+0xf4/0x128 kthread from ret_from_fork+0x14/0x2c Exception stack(0xf0a15fb0 to 0xf0a15ff8) ... ---[ end trace 0000000000000000 ]--- This issue is caused by bus->devfreq->profile->freq_table being NULL here: https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git/tree/drivers/devfreq/exynos-bus.c?h=next-20220630#n451 > drivers/devfreq/devfreq.c | 71 ++++++++++++++---------------- > drivers/devfreq/governor_passive.c | 14 +++--- > include/linux/devfreq.h | 5 +++ > 3 files changed, 46 insertions(+), 44 deletions(-) > > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c > index 01474daf4548..2e2b3b414d67 100644 > --- a/drivers/devfreq/devfreq.c > +++ b/drivers/devfreq/devfreq.c > @@ -123,7 +123,7 @@ void devfreq_get_freq_range(struct devfreq *devfreq, > unsigned long *min_freq, > unsigned long *max_freq) > { > - unsigned long *freq_table = devfreq->profile->freq_table; > + unsigned long *freq_table = devfreq->freq_table; > s32 qos_min_freq, qos_max_freq; > > lockdep_assert_held(&devfreq->lock); > @@ -133,11 +133,11 @@ void devfreq_get_freq_range(struct devfreq *devfreq, > * The devfreq drivers can initialize this in either ascending or > * descending order and devfreq core supports both. > */ > - if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) { > + if (freq_table[0] < freq_table[devfreq->max_state - 1]) { > *min_freq = freq_table[0]; > - *max_freq = freq_table[devfreq->profile->max_state - 1]; > + *max_freq = freq_table[devfreq->max_state - 1]; > } else { > - *min_freq = freq_table[devfreq->profile->max_state - 1]; > + *min_freq = freq_table[devfreq->max_state - 1]; > *max_freq = freq_table[0]; > } > > @@ -169,8 +169,8 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) > { > int lev; > > - for (lev = 0; lev < devfreq->profile->max_state; lev++) > - if (freq == devfreq->profile->freq_table[lev]) > + for (lev = 0; lev < devfreq->max_state; lev++) > + if (freq == devfreq->freq_table[lev]) > return lev; > > return -EINVAL; > @@ -178,7 +178,6 @@ static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) > > static int set_freq_table(struct devfreq *devfreq) > { > - struct devfreq_dev_profile *profile = devfreq->profile; > struct dev_pm_opp *opp; > unsigned long freq; > int i, count; > @@ -188,25 +187,22 @@ static int set_freq_table(struct devfreq *devfreq) > if (count <= 0) > return -EINVAL; > > - profile->max_state = count; > - profile->freq_table = devm_kcalloc(devfreq->dev.parent, > - profile->max_state, > - sizeof(*profile->freq_table), > - GFP_KERNEL); > - if (!profile->freq_table) { > - profile->max_state = 0; > + devfreq->max_state = count; > + devfreq->freq_table = devm_kcalloc(devfreq->dev.parent, > + devfreq->max_state, > + sizeof(*devfreq->freq_table), > + GFP_KERNEL); > + if (!devfreq->freq_table) > return -ENOMEM; > - } > > - for (i = 0, freq = 0; i < profile->max_state; i++, freq++) { > + for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) { > opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq); > if (IS_ERR(opp)) { > - devm_kfree(devfreq->dev.parent, profile->freq_table); > - profile->max_state = 0; > + devm_kfree(devfreq->dev.parent, devfreq->freq_table); > return PTR_ERR(opp); > } > dev_pm_opp_put(opp); > - profile->freq_table[i] = freq; > + devfreq->freq_table[i] = freq; > } > > return 0; > @@ -246,7 +242,7 @@ int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) > > if (lev != prev_lev) { > devfreq->stats.trans_table[ > - (prev_lev * devfreq->profile->max_state) + lev]++; > + (prev_lev * devfreq->max_state) + lev]++; > devfreq->stats.total_trans++; > } > > @@ -835,6 +831,9 @@ struct devfreq *devfreq_add_device(struct device *dev, > if (err < 0) > goto err_dev; > mutex_lock(&devfreq->lock); > + } else { > + devfreq->freq_table = devfreq->profile->freq_table; > + devfreq->max_state = devfreq->profile->max_state; > } > > devfreq->scaling_min_freq = find_available_min_freq(devfreq); > @@ -870,8 +869,8 @@ struct devfreq *devfreq_add_device(struct device *dev, > > devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev, > array3_size(sizeof(unsigned int), > - devfreq->profile->max_state, > - devfreq->profile->max_state), > + devfreq->max_state, > + devfreq->max_state), > GFP_KERNEL); > if (!devfreq->stats.trans_table) { > mutex_unlock(&devfreq->lock); > @@ -880,7 +879,7 @@ struct devfreq *devfreq_add_device(struct device *dev, > } > > devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev, > - devfreq->profile->max_state, > + devfreq->max_state, > sizeof(*devfreq->stats.time_in_state), > GFP_KERNEL); > if (!devfreq->stats.time_in_state) { > @@ -1665,9 +1664,9 @@ static ssize_t available_frequencies_show(struct device *d, > > mutex_lock(&df->lock); > > - for (i = 0; i < df->profile->max_state; i++) > + for (i = 0; i < df->max_state; i++) > count += scnprintf(&buf[count], (PAGE_SIZE - count - 2), > - "%lu ", df->profile->freq_table[i]); > + "%lu ", df->freq_table[i]); > > mutex_unlock(&df->lock); > /* Truncate the trailing space */ > @@ -1690,7 +1689,7 @@ static ssize_t trans_stat_show(struct device *dev, > > if (!df->profile) > return -EINVAL; > - max_state = df->profile->max_state; > + max_state = df->max_state; > > if (max_state == 0) > return sprintf(buf, "Not Supported.\n"); > @@ -1707,19 +1706,17 @@ static ssize_t trans_stat_show(struct device *dev, > len += sprintf(buf + len, " :"); > for (i = 0; i < max_state; i++) > len += sprintf(buf + len, "%10lu", > - df->profile->freq_table[i]); > + df->freq_table[i]); > > len += sprintf(buf + len, " time(ms)\n"); > > for (i = 0; i < max_state; i++) { > - if (df->profile->freq_table[i] > - == df->previous_freq) { > + if (df->freq_table[i] == df->previous_freq) > len += sprintf(buf + len, "*"); > - } else { > + else > len += sprintf(buf + len, " "); > - } > - len += sprintf(buf + len, "%10lu:", > - df->profile->freq_table[i]); > + > + len += sprintf(buf + len, "%10lu:", df->freq_table[i]); > for (j = 0; j < max_state; j++) > len += sprintf(buf + len, "%10u", > df->stats.trans_table[(i * max_state) + j]); > @@ -1743,7 +1740,7 @@ static ssize_t trans_stat_store(struct device *dev, > if (!df->profile) > return -EINVAL; > > - if (df->profile->max_state == 0) > + if (df->max_state == 0) > return count; > > err = kstrtoint(buf, 10, &value); > @@ -1751,11 +1748,11 @@ static ssize_t trans_stat_store(struct device *dev, > return -EINVAL; > > mutex_lock(&df->lock); > - memset(df->stats.time_in_state, 0, (df->profile->max_state * > + memset(df->stats.time_in_state, 0, (df->max_state * > sizeof(*df->stats.time_in_state))); > memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int), > - df->profile->max_state, > - df->profile->max_state)); > + df->max_state, > + df->max_state)); > df->stats.total_trans = 0; > df->stats.last_update = get_jiffies_64(); > mutex_unlock(&df->lock); > diff --git a/drivers/devfreq/governor_passive.c b/drivers/devfreq/governor_passive.c > index 72c67979ebe1..ce24a262aa16 100644 > --- a/drivers/devfreq/governor_passive.c > +++ b/drivers/devfreq/governor_passive.c > @@ -131,18 +131,18 @@ static int get_target_freq_with_devfreq(struct devfreq *devfreq, > goto out; > > /* Use interpolation if required opps is not available */ > - for (i = 0; i < parent_devfreq->profile->max_state; i++) > - if (parent_devfreq->profile->freq_table[i] == *freq) > + for (i = 0; i < parent_devfreq->max_state; i++) > + if (parent_devfreq->freq_table[i] == *freq) > break; > > - if (i == parent_devfreq->profile->max_state) > + if (i == parent_devfreq->max_state) > return -EINVAL; > > - if (i < devfreq->profile->max_state) { > - child_freq = devfreq->profile->freq_table[i]; > + if (i < devfreq->max_state) { > + child_freq = devfreq->freq_table[i]; > } else { > - count = devfreq->profile->max_state; > - child_freq = devfreq->profile->freq_table[count - 1]; > + count = devfreq->max_state; > + child_freq = devfreq->freq_table[count - 1]; > } > > out: > diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h > index dc10bee75a72..34aab4dd336c 100644 > --- a/include/linux/devfreq.h > +++ b/include/linux/devfreq.h > @@ -148,6 +148,8 @@ struct devfreq_stats { > * reevaluate operable frequencies. Devfreq users may use > * devfreq.nb to the corresponding register notifier call chain. > * @work: delayed work for load monitoring. > + * @freq_table: current frequency table used by the devfreq driver. > + * @max_state: count of entry present in the frequency table. > * @previous_freq: previously configured frequency value. > * @last_status: devfreq user device info, performance statistics > * @data: Private data of the governor. The devfreq framework does not > @@ -185,6 +187,9 @@ struct devfreq { > struct notifier_block nb; > struct delayed_work work; > > + unsigned long *freq_table; > + unsigned int max_state; > + > unsigned long previous_freq; > struct devfreq_dev_status last_status; > Best regards -- Marek Szyprowski, PhD Samsung R&D Institute Poland