Hi Uwe Kleine-König, > -----Original Message----- > From: Uwe Kleine-König <u.kleine-koenig@xxxxxxxxxxxxxx> > Sent: Tuesday, March 12, 2024 7:20 AM > Subject: Re: [PATCH v18 3/4] pwm: Add support for RZ/G2L GPT > > Hello, > > On Tue, Feb 20, 2024 at 07:43:17PM +0000, Biju Das wrote: > > diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c > > new file mode 100644 index 000000000000..0dc8163ee92b > > --- /dev/null > > +++ b/drivers/pwm/pwm-rzg2l-gpt.c > > @@ -0,0 +1,559 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Renesas RZ/G2L General PWM Timer (GPT) driver > > + * > > + * Copyright (C) 2024 Renesas Electronics Corporation > > + * > > + * Hardware manual for this IP can be found here > > + * > > +https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-u > > +sers-manual-hardware-0?language=en > > + * > > + * Limitations: > > + * - Counter must be stopped before modifying Mode and Prescaler. > > + * - When PWM is disabled, the output is driven to inactive. > > + * - While the hardware supports both polarities, the driver (for now) > > + * only handles normal polarity. > > + * - When both channels are used, disabling the channel on one stops the > > + * other. > > Do I understand right that the driver doesn't disable one channel if the other is running? Yes, that is correct. We will allow to set only duty cycle for other channel. The first enabled channel sets the shared registers and duty cycle. > > > + */ > > + > > +#include <linux/bitfield.h> > > +#include <linux/clk.h> > > +#include <linux/io.h> > > +#include <linux/limits.h> > > +#include <linux/module.h> > > +#include <linux/of.h> > > +#include <linux/platform_device.h> > > +#include <linux/pm_runtime.h> > > +#include <linux/pwm.h> > > +#include <linux/reset.h> > > +#include <linux/time.h> > > +#include <linux/units.h> > > + > > +#define RZG2L_GTCR 0x2c > > +#define RZG2L_GTUDDTYC 0x30 > > +#define RZG2L_GTIOR 0x34 > > +#define RZG2L_GTBER 0x40 > > +#define RZG2L_GTCNT 0x48 > > +#define RZG2L_GTCCR(i) (0x4c + 4 * (i)) > > +#define RZG2L_GTPR 0x64 > > + > > +#define RZG2L_GTCR_CST BIT(0) > > +#define RZG2L_GTCR_MD GENMASK(18, 16) > > +#define RZG2L_GTCR_TPCS GENMASK(26, 24) > > + > > +#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE FIELD_PREP(RZG2L_GTCR_MD, 0) > > + > > +#define RZG2L_GTUDDTYC_UP BIT(0) > > +#define RZG2L_GTUDDTYC_UDF BIT(1) > > +#define RZG2L_UP_COUNTING (RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF) > > Would it make sense to have GTUDDTYC in the last define's name? Will make it RZG2L_GTUDDTYC_UP_COUNTING as RZG2L_GTUDDTYC reg offset defined above. > > > +#define RZG2L_GTIOR_GTIOA GENMASK(4, 0) > > +#define RZG2L_GTIOR_GTIOB GENMASK(20, 16) > > +#define RZG2L_GTIOR_GTIOx(a) ((a) ? RZG2L_GTIOR_GTIOB : RZG2L_GTIOR_GTIOA) > > +#define RZG2L_GTIOR_OAE BIT(8) > > +#define RZG2L_GTIOR_OBE BIT(24) > > +#define RZG2L_GTIOR_OxE(a) ((a) ? RZG2L_GTIOR_OBE : RZG2L_GTIOR_OAE) > > + > > +#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE 0x1b > > +#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \ > > + (RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE) #define > > +RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \ > > + (FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE) > > +| RZG2L_GTIOR_OBE) > > + > > +#define RZG2L_GTIOR_GTIOx_OUT_HI_END_TOGGLE_CMP_MATCH(a) \ > > + ((a) ? RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH : \ > > + RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH) > > + > > +#define RZG2L_MAX_HW_CHANNELS 8 > > +#define RZG2L_CHANNELS_PER_IO 2 > > +#define RZG2L_MAX_PWM_CHANNELS (RZG2L_MAX_HW_CHANNELS * RZG2L_CHANNELS_PER_IO) > > +#define RZG2L_MAX_SCALE_FACTOR 1024 > > + > > +#define RZG2L_GET_CH(a) ((a) / 2) > > + > > +#define RZG2L_GET_CH_OFFS(i) (0x100 * (i)) > > + > > +struct rzg2l_gpt_chip { > > + struct pwm_chip chip; Will remove it in next version as devm_pwmchip_alloc() will be used. > > + void __iomem *mmio; > > + struct reset_control *rstc; > > + struct clk *clk; > > + struct mutex lock; /* lock to protect shared channel resources */ > > + unsigned long rate; > > + u64 max_val; > > + u32 period_cycles[RZG2L_MAX_HW_CHANNELS]; > > + u32 user_count[RZG2L_MAX_HW_CHANNELS]; > > + u32 enable_count[RZG2L_MAX_HW_CHANNELS]; > > + DECLARE_BITMAP(ch_en_bits, RZG2L_MAX_PWM_CHANNELS); }; > > + > > +static inline unsigned int rzg2l_gpt_subchannel(unsigned int hwpwm) { > > + return hwpwm & 0x1; > > +} > > + > > +static inline u64 rzg2l_gpt_mul_u64_u64_div_u64(u64 a, u64 b, u64 c) > > +{ > > + u64 retval; > > + > > + if (a > b) > > + retval = mul_u64_u64_div_u64(b, a, c); > > + else > > + retval = mul_u64_u64_div_u64(a, b, c); > > With > https://lore.kernel.org/lkml/20240303092408.662449-2-u.kleine-koenig@xxxxxxxxxxxxxx > this function can be replaced by a direct call to mul_u64_u64_div_u64(). > I expect this patch to go into v6.9-rc1 as akpm picked it up before the merge window opened. Ok, I will hold next version until v6.9-rc1 as for-pwm-nexxt doesn't have this patch?? > > > + return retval; > > +} > > [...] > > +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt, > > +u32 val, u8 prescale) { > > + u64 tmp, d; > > + > > + /* > > + * Rate is in MHz and is always integer for peripheral clk > > + * 2^32 * 2^10 (prescalar) * 10^9 > 2^64 > > + * 2^32 * 2^10 (prescalar) * 10^6 < 2^64 > > + * Multiply val with prescalar first, if the result is less than > > + * 2^34, then multiply by 10^9. Otherwise divide nr and dr by 10^3 > > + * so that it will never overflow. > > + */ > > + > > + tmp = (u64)val << (2 * prescale); > > + if (tmp <= (1ULL << 34)) { > > I would have written that as: > > if (tmp >> 34 == 0) > > (which implements tmp < (1ULL << 34), which doesn't matter much). > > > + tmp *= NSEC_PER_SEC; > > + d = rzg2l_gpt->rate; > > + } else { > > + tmp *= div64_u64(NSEC_PER_SEC, KILO); > > I don't know if the compiler is clever enough to not calculate that every time? Also using div64_u64 is > too heavy given that both values fit into an u32. > > > + d = div64_u64(rzg2l_gpt->rate, KILO); > > At first I thought you could better use 1024 as the common divisor here as it could be implemented > using a shift operation. But I understood with the comment above that we're not losing precision here > as both NSEC_PER_SEC and rate are a multiple of 1000. > > Maybe s/Rate is in MHz and is always integer for peripheral clk/Rate is a multiple of 1000000, and so > dividing by 1000 is an exact operation./ ? I will use rate_khz as suggested in the later thread and getrid of all above checks. + tmp = (u64)val << (2 * prescale); + tmp *= USEC_PER_SEC; + + return DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate_khz); > > > > + } > > + > > + return DIV64_U64_ROUND_UP(tmp, d); > > +} > > + > > +static int rzg2l_gpt_get_state(struct pwm_chip *chip, struct pwm_device *pwm, > > + struct pwm_state *state) > > +{ > > + struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip); > > + int rc; > > + > > + rc = pm_runtime_resume_and_get(chip->dev); > > + if (rc) > > + return rc; > > + > > + state->enabled = rzg2l_gpt_is_ch_enabled(rzg2l_gpt, pwm->hwpwm); > > + if (state->enabled) { > > + u32 ch = RZG2L_GET_CH(pwm->hwpwm); > > + u32 offs = RZG2L_GET_CH_OFFS(ch); > > + u8 prescale; > > + u32 val; > > + > > + val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR); > > + prescale = FIELD_GET(RZG2L_GTCR_TPCS, val); > > + > > + val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTPR); > > + state->period = calculate_period_or_duty(rzg2l_gpt, val, prescale); > > + > > + val = rzg2l_gpt_read(rzg2l_gpt, > > + offs + RZG2L_GTCCR(rzg2l_gpt_subchannel(pwm->hwpwm))); > > + state->duty_cycle = calculate_period_or_duty(rzg2l_gpt, val, prescale); > > + if (state->duty_cycle > state->period) > > + state->duty_cycle = state->period; > > + } > > + > > + state->polarity = PWM_POLARITY_NORMAL; > > + pm_runtime_put(chip->dev); > > + > > + return 0; > > +} > > + > > +static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8 > > +prescale) { > > + return min_t(u64, (period_or_duty_cycle + (1 << (2 * prescale)) - 1) >> (2 * prescale), > > + U32_MAX); > > Can the addition overflow? Is the addition even right? This function is used in .apply() where it's > usually right to round down. No, It won't overflow. The logic is proposed by you in v17 for DIV64_U64_ROUND_UP and it is passing all tests with PWM_DEBUG=y. VAL=10000 echo "#### Zero duty cycle ###" echo 0 > /sys/class/pwm/$PWMCHIP/pwm${IO_1}/duty_cycle echo "#### decrement Period ###" for i in $(seq ${VAL} -1 1); do echo "#### Increment Period ###" for i in $(seq 1 ${VAL}); do echo "#### decrement duty cycle ###" for i in $(seq ${VAL} -1 1); do echo "#### Increment duty cycle ###" for i in $(seq 1 ${VAL}); do > > > +} > > + > > +/* Caller holds the lock while calling rzg2l_gpt_config() */ static > > +int rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm, > > + const struct pwm_state *state) { > > + struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip); > > + u8 ch = RZG2L_GET_CH(pwm->hwpwm); > > + u32 offs = RZG2L_GET_CH_OFFS(ch); > > + unsigned long pv, dc; > > + u64 period_cycles; > > + u64 duty_cycles; > > + u8 prescale; > > + > > + /* Limit period/duty cycle to max value supported by the HW */ > > + if (state->period > rzg2l_gpt->max_val) > > + period_cycles = rzg2l_gpt->max_val; > > + else > > + period_cycles = state->period; > > this is equivalent to > > period_cycles = min(state->period, rzg2l_gpt->max_val); > > Is this less readable to justify keeping the if? Agreed. > > > + period_cycles = rzg2l_gpt_mul_u64_u64_div_u64(period_cycles, > > +rzg2l_gpt->rate, NSEC_PER_SEC); > > After this operation period_cycles's unit is really hardware cycles. > Before it isn't. I suggest to introduce another variable "period" for the value above. So make this > read: > > period = min(state->period, rzg2l_gpt->max_val); > period_cycles = mul_u64_u64_div_u64(period, rzg2l_gpt->rate, NSEC_PER_SEC); > > . This shouldn't be harder for the compiler but easier for the human reader. Agreed. Will use period, duty_cycle, period_cycles, duty_cycles variables and later two variables are for hardware cycles. > > > + /* > > + * GPT counter is shared by multiple channels, so prescale and period > > + * can NOT be modified when there are multiple channels in use with > > + * different settings. > > + */ > > + if (period_cycles < rzg2l_gpt->period_cycles[ch] && > > +rzg2l_gpt->user_count[ch] > 1) > > Would it make sense to swap the checks? Technically it doesn't make a difference, but if rzg2l_gpt- > >user_count[ch] == 0 rzg2l_gpt->period_cycles[ch] might be an invalid value? Agreed. > > > + return -EBUSY; > > + > > + prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_cycles); > > + pv = rzg2l_gpt_calculate_pv_or_dc(period_cycles, prescale); > > + > > + if (state->duty_cycle > rzg2l_gpt->max_val) > > + duty_cycles = rzg2l_gpt->max_val; > > + else > > + duty_cycles = state->duty_cycle; > > + > > + duty_cycles = rzg2l_gpt_mul_u64_u64_div_u64(duty_cycles, rzg2l_gpt->rate, NSEC_PER_SEC); > > + dc = rzg2l_gpt_calculate_pv_or_dc(duty_cycles, prescale); > > + > > + /* > > + * GPT counter is shared by multiple channels, we cache the period cycles > > + * from the first enabled channel and use the same value for both > > + * channels. > > + */ > > + rzg2l_gpt->period_cycles[ch] = period_cycles; > > + > > + /* > > + * Counter must be stopped before modifying mode, prescaler, timer > > + * counter and buffer enable registers. These registers are shared > > + * between both channels. So allow updating these registers only for the > > + * first enabled channel. > > + */ > > + if (rzg2l_gpt->enable_count[ch] <= 1) > > + rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_CST, 0); > > What happens for the second running channel here? You're still writing to the relevant registers, just > without stopping the hardware. Sounds strange. It is a mistake. I will add check for shared registers, so that it won't set by the second running channel. > > > + > > + /* GPT set operating mode (saw-wave up-counting) */ > > + rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_MD, > > + RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE); > > + > > [...] > > +static int rzg2l_gpt_apply(struct pwm_chip *chip, struct pwm_device *pwm, > > + const struct pwm_state *state) > > +{ > > + struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip); > > + bool enabled = pwm->state.enabled; > > + int ret; > > + > > + if (state->polarity != PWM_POLARITY_NORMAL) > > + return -EINVAL; > > + > > + if (!state->enabled) { > > + if (enabled) { > > + rzg2l_gpt_disable(rzg2l_gpt, pwm); > > + pm_runtime_put_sync(rzg2l_gpt->chip.dev); > > + } > > + > > + return 0; > > + } > > + > > + if (!enabled) { > > + ret = pm_runtime_resume_and_get(rzg2l_gpt->chip.dev); > > + if (ret) > > + return ret; > > + } > > + > > + mutex_lock(&rzg2l_gpt->lock); > > + ret = rzg2l_gpt_config(chip, pwm, state); > > + mutex_unlock(&rzg2l_gpt->lock); > > + if (ret) > > Is here a conditional pm_runtime_put_sync() missing? Will fix it. > > > + return ret; > > + > > + if (!enabled) > > + ret = rzg2l_gpt_enable(rzg2l_gpt, pwm); > > and here? OK. > > > + return ret; > > +} > > + > > [...] > > +static int rzg2l_gpt_probe(struct platform_device *pdev) { > > + struct rzg2l_gpt_chip *rzg2l_gpt; > > + int ret; > > + u32 i; > > + > > + rzg2l_gpt = devm_kzalloc(&pdev->dev, sizeof(*rzg2l_gpt), GFP_KERNEL); > > + if (!rzg2l_gpt) > > + return -ENOMEM; > > + > > + rzg2l_gpt->mmio = devm_platform_ioremap_resource(pdev, 0); > > + if (IS_ERR(rzg2l_gpt->mmio)) > > + return PTR_ERR(rzg2l_gpt->mmio); > > + > > + rzg2l_gpt->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); > > + if (IS_ERR(rzg2l_gpt->rstc)) > > + return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->rstc), > > + "get reset failed\n"); > > + > > + rzg2l_gpt->clk = devm_clk_get(&pdev->dev, NULL); > > + if (IS_ERR(rzg2l_gpt->clk)) > > + return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->clk), > > + "cannot get clock\n"); > > + > > + ret = reset_control_deassert(rzg2l_gpt->rstc); > > + if (ret) > > + return dev_err_probe(&pdev->dev, ret, > > + "cannot deassert reset control\n"); > > + > > + pm_runtime_enable(&pdev->dev); > > + ret = pm_runtime_resume_and_get(&pdev->dev); > > + if (ret) > > + goto err_reset; > > + > > + ret = clk_rate_exclusive_get(rzg2l_gpt->clk); > > There is a devm variant of this function in the mean time. OK, currently for testing I picked it from next. > > > + if (ret) > > + goto err_pm_put; > > + > > + rzg2l_gpt->rate = clk_get_rate(rzg2l_gpt->clk); > > + if (!rzg2l_gpt->rate) { > > + ret = dev_err_probe(&pdev->dev, -EINVAL, "gpt clk rate is 0"); > > + goto err_clk_rate_put; > > + } > > + > > + /* > > + * Refuse clk rates > 1 GHz to prevent overflow later for computing > > + * period and duty cycle. > > + */ > > + if (rzg2l_gpt->rate > NSEC_PER_SEC) { > > + ret = -EINVAL; > > Error message please. OK. Other than this, I will use the below changes in next version 1) devm_pwmchip_alloc() 2) use a local variable dev to replace &pdev->dev in probe() in the next version. 3) Also will add below check in probe as you suggested in later thread. + /* + * Rate is in MHz and is always integer for peripheral clk + * 2^32 * 2^10 (prescalar) * 10^6 (rate_khz) < 2^64 + * So make sure rate is multiple of 1000. + */ + rzg2l_gpt->rate_khz = rate / KILO; + if (rzg2l_gpt->rate_khz * KILO != rate) { + ret = dev_err_probe(dev, -EINVAL, "rate is not multiple of 1000"); + goto err_pm_put; + } Cheers, Biju