Hello, On Tue, Feb 20, 2024 at 07:43:17PM +0000, Biju Das wrote: > diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c > new file mode 100644 > index 000000000000..0dc8163ee92b > --- /dev/null > +++ b/drivers/pwm/pwm-rzg2l-gpt.c > @@ -0,0 +1,559 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Renesas RZ/G2L General PWM Timer (GPT) driver > + * > + * Copyright (C) 2024 Renesas Electronics Corporation > + * > + * Hardware manual for this IP can be found here > + * https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-users-manual-hardware-0?language=en > + * > + * Limitations: > + * - Counter must be stopped before modifying Mode and Prescaler. > + * - When PWM is disabled, the output is driven to inactive. > + * - While the hardware supports both polarities, the driver (for now) > + * only handles normal polarity. > + * - When both channels are used, disabling the channel on one stops the > + * other. Do I understand right that the driver doesn't disable one channel if the other is running? > + */ > + > +#include <linux/bitfield.h> > +#include <linux/clk.h> > +#include <linux/io.h> > +#include <linux/limits.h> > +#include <linux/module.h> > +#include <linux/of.h> > +#include <linux/platform_device.h> > +#include <linux/pm_runtime.h> > +#include <linux/pwm.h> > +#include <linux/reset.h> > +#include <linux/time.h> > +#include <linux/units.h> > + > +#define RZG2L_GTCR 0x2c > +#define RZG2L_GTUDDTYC 0x30 > +#define RZG2L_GTIOR 0x34 > +#define RZG2L_GTBER 0x40 > +#define RZG2L_GTCNT 0x48 > +#define RZG2L_GTCCR(i) (0x4c + 4 * (i)) > +#define RZG2L_GTPR 0x64 > + > +#define RZG2L_GTCR_CST BIT(0) > +#define RZG2L_GTCR_MD GENMASK(18, 16) > +#define RZG2L_GTCR_TPCS GENMASK(26, 24) > + > +#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE FIELD_PREP(RZG2L_GTCR_MD, 0) > + > +#define RZG2L_GTUDDTYC_UP BIT(0) > +#define RZG2L_GTUDDTYC_UDF BIT(1) > +#define RZG2L_UP_COUNTING (RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF) Would it make sense to have GTUDDTYC in the last define's name? > +#define RZG2L_GTIOR_GTIOA GENMASK(4, 0) > +#define RZG2L_GTIOR_GTIOB GENMASK(20, 16) > +#define RZG2L_GTIOR_GTIOx(a) ((a) ? RZG2L_GTIOR_GTIOB : RZG2L_GTIOR_GTIOA) > +#define RZG2L_GTIOR_OAE BIT(8) > +#define RZG2L_GTIOR_OBE BIT(24) > +#define RZG2L_GTIOR_OxE(a) ((a) ? RZG2L_GTIOR_OBE : RZG2L_GTIOR_OAE) > + > +#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE 0x1b > +#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \ > + (RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE) > +#define RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \ > + (FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE) | RZG2L_GTIOR_OBE) > + > +#define RZG2L_GTIOR_GTIOx_OUT_HI_END_TOGGLE_CMP_MATCH(a) \ > + ((a) ? RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH : \ > + RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH) > + > +#define RZG2L_MAX_HW_CHANNELS 8 > +#define RZG2L_CHANNELS_PER_IO 2 > +#define RZG2L_MAX_PWM_CHANNELS (RZG2L_MAX_HW_CHANNELS * RZG2L_CHANNELS_PER_IO) > +#define RZG2L_MAX_SCALE_FACTOR 1024 > + > +#define RZG2L_GET_CH(a) ((a) / 2) > + > +#define RZG2L_GET_CH_OFFS(i) (0x100 * (i)) > + > +struct rzg2l_gpt_chip { > + struct pwm_chip chip; > + void __iomem *mmio; > + struct reset_control *rstc; > + struct clk *clk; > + struct mutex lock; /* lock to protect shared channel resources */ > + unsigned long rate; > + u64 max_val; > + u32 period_cycles[RZG2L_MAX_HW_CHANNELS]; > + u32 user_count[RZG2L_MAX_HW_CHANNELS]; > + u32 enable_count[RZG2L_MAX_HW_CHANNELS]; > + DECLARE_BITMAP(ch_en_bits, RZG2L_MAX_PWM_CHANNELS); > +}; > + > +static inline unsigned int rzg2l_gpt_subchannel(unsigned int hwpwm) > +{ > + return hwpwm & 0x1; > +} > + > +static inline u64 rzg2l_gpt_mul_u64_u64_div_u64(u64 a, u64 b, u64 c) > +{ > + u64 retval; > + > + if (a > b) > + retval = mul_u64_u64_div_u64(b, a, c); > + else > + retval = mul_u64_u64_div_u64(a, b, c); With https://lore.kernel.org/lkml/20240303092408.662449-2-u.kleine-koenig@xxxxxxxxxxxxxx this function can be replaced by a direct call to mul_u64_u64_div_u64(). I expect this patch to go into v6.9-rc1 as akpm picked it up before the merge window opened. > + return retval; > +} > [...] > +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt, u32 val, u8 prescale) > +{ > + u64 tmp, d; > + > + /* > + * Rate is in MHz and is always integer for peripheral clk > + * 2^32 * 2^10 (prescalar) * 10^9 > 2^64 > + * 2^32 * 2^10 (prescalar) * 10^6 < 2^64 > + * Multiply val with prescalar first, if the result is less than > + * 2^34, then multiply by 10^9. Otherwise divide nr and dr by 10^3 > + * so that it will never overflow. > + */ > + > + tmp = (u64)val << (2 * prescale); > + if (tmp <= (1ULL << 34)) { I would have written that as: if (tmp >> 34 == 0) (which implements tmp < (1ULL << 34), which doesn't matter much). > + tmp *= NSEC_PER_SEC; > + d = rzg2l_gpt->rate; > + } else { > + tmp *= div64_u64(NSEC_PER_SEC, KILO); I don't know if the compiler is clever enough to not calculate that every time? Also using div64_u64 is too heavy given that both values fit into an u32. > + d = div64_u64(rzg2l_gpt->rate, KILO); At first I thought you could better use 1024 as the common divisor here as it could be implemented using a shift operation. But I understood with the comment above that we're not losing precision here as both NSEC_PER_SEC and rate are a multiple of 1000. Maybe s/Rate is in MHz and is always integer for peripheral clk/Rate is a multiple of 1000000, and so dividing by 1000 is an exact operation./ ? > + } > + > + return DIV64_U64_ROUND_UP(tmp, d); > +} > + > +static int rzg2l_gpt_get_state(struct pwm_chip *chip, struct pwm_device *pwm, > + struct pwm_state *state) > +{ > + struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip); > + int rc; > + > + rc = pm_runtime_resume_and_get(chip->dev); > + if (rc) > + return rc; > + > + state->enabled = rzg2l_gpt_is_ch_enabled(rzg2l_gpt, pwm->hwpwm); > + if (state->enabled) { > + u32 ch = RZG2L_GET_CH(pwm->hwpwm); > + u32 offs = RZG2L_GET_CH_OFFS(ch); > + u8 prescale; > + u32 val; > + > + val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR); > + prescale = FIELD_GET(RZG2L_GTCR_TPCS, val); > + > + val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTPR); > + state->period = calculate_period_or_duty(rzg2l_gpt, val, prescale); > + > + val = rzg2l_gpt_read(rzg2l_gpt, > + offs + RZG2L_GTCCR(rzg2l_gpt_subchannel(pwm->hwpwm))); > + state->duty_cycle = calculate_period_or_duty(rzg2l_gpt, val, prescale); > + if (state->duty_cycle > state->period) > + state->duty_cycle = state->period; > + } > + > + state->polarity = PWM_POLARITY_NORMAL; > + pm_runtime_put(chip->dev); > + > + return 0; > +} > + > +static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8 prescale) > +{ > + return min_t(u64, (period_or_duty_cycle + (1 << (2 * prescale)) - 1) >> (2 * prescale), > + U32_MAX); Can the addition overflow? Is the addition even right? This function is used in .apply() where it's usually right to round down. > +} > + > +/* Caller holds the lock while calling rzg2l_gpt_config() */ > +static int rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm, > + const struct pwm_state *state) > +{ > + struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip); > + u8 ch = RZG2L_GET_CH(pwm->hwpwm); > + u32 offs = RZG2L_GET_CH_OFFS(ch); > + unsigned long pv, dc; > + u64 period_cycles; > + u64 duty_cycles; > + u8 prescale; > + > + /* Limit period/duty cycle to max value supported by the HW */ > + if (state->period > rzg2l_gpt->max_val) > + period_cycles = rzg2l_gpt->max_val; > + else > + period_cycles = state->period; this is equivalent to period_cycles = min(state->period, rzg2l_gpt->max_val); Is this less readable to justify keeping the if? > + period_cycles = rzg2l_gpt_mul_u64_u64_div_u64(period_cycles, rzg2l_gpt->rate, NSEC_PER_SEC); After this operation period_cycles's unit is really hardware cycles. Before it isn't. I suggest to introduce another variable "period" for the value above. So make this read: period = min(state->period, rzg2l_gpt->max_val); period_cycles = mul_u64_u64_div_u64(period, rzg2l_gpt->rate, NSEC_PER_SEC); . This shouldn't be harder for the compiler but easier for the human reader. > + /* > + * GPT counter is shared by multiple channels, so prescale and period > + * can NOT be modified when there are multiple channels in use with > + * different settings. > + */ > + if (period_cycles < rzg2l_gpt->period_cycles[ch] && rzg2l_gpt->user_count[ch] > 1) Would it make sense to swap the checks? Technically it doesn't make a difference, but if rzg2l_gpt->user_count[ch] == 0 rzg2l_gpt->period_cycles[ch] might be an invalid value? > + return -EBUSY; > + > + prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_cycles); > + pv = rzg2l_gpt_calculate_pv_or_dc(period_cycles, prescale); > + > + if (state->duty_cycle > rzg2l_gpt->max_val) > + duty_cycles = rzg2l_gpt->max_val; > + else > + duty_cycles = state->duty_cycle; > + > + duty_cycles = rzg2l_gpt_mul_u64_u64_div_u64(duty_cycles, rzg2l_gpt->rate, NSEC_PER_SEC); > + dc = rzg2l_gpt_calculate_pv_or_dc(duty_cycles, prescale); > + > + /* > + * GPT counter is shared by multiple channels, we cache the period cycles > + * from the first enabled channel and use the same value for both > + * channels. > + */ > + rzg2l_gpt->period_cycles[ch] = period_cycles; > + > + /* > + * Counter must be stopped before modifying mode, prescaler, timer > + * counter and buffer enable registers. These registers are shared > + * between both channels. So allow updating these registers only for the > + * first enabled channel. > + */ > + if (rzg2l_gpt->enable_count[ch] <= 1) > + rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_CST, 0); What happens for the second running channel here? You're still writing to the relevant registers, just without stopping the hardware. Sounds strange. > + > + /* GPT set operating mode (saw-wave up-counting) */ > + rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, RZG2L_GTCR_MD, > + RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE); > + > [...] > +static int rzg2l_gpt_apply(struct pwm_chip *chip, struct pwm_device *pwm, > + const struct pwm_state *state) > +{ > + struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip); > + bool enabled = pwm->state.enabled; > + int ret; > + > + if (state->polarity != PWM_POLARITY_NORMAL) > + return -EINVAL; > + > + if (!state->enabled) { > + if (enabled) { > + rzg2l_gpt_disable(rzg2l_gpt, pwm); > + pm_runtime_put_sync(rzg2l_gpt->chip.dev); > + } > + > + return 0; > + } > + > + if (!enabled) { > + ret = pm_runtime_resume_and_get(rzg2l_gpt->chip.dev); > + if (ret) > + return ret; > + } > + > + mutex_lock(&rzg2l_gpt->lock); > + ret = rzg2l_gpt_config(chip, pwm, state); > + mutex_unlock(&rzg2l_gpt->lock); > + if (ret) Is here a conditional pm_runtime_put_sync() missing? > + return ret; > + > + if (!enabled) > + ret = rzg2l_gpt_enable(rzg2l_gpt, pwm); and here? > + return ret; > +} > + > [...] > +static int rzg2l_gpt_probe(struct platform_device *pdev) > +{ > + struct rzg2l_gpt_chip *rzg2l_gpt; > + int ret; > + u32 i; > + > + rzg2l_gpt = devm_kzalloc(&pdev->dev, sizeof(*rzg2l_gpt), GFP_KERNEL); > + if (!rzg2l_gpt) > + return -ENOMEM; > + > + rzg2l_gpt->mmio = devm_platform_ioremap_resource(pdev, 0); > + if (IS_ERR(rzg2l_gpt->mmio)) > + return PTR_ERR(rzg2l_gpt->mmio); > + > + rzg2l_gpt->rstc = devm_reset_control_get_exclusive(&pdev->dev, NULL); > + if (IS_ERR(rzg2l_gpt->rstc)) > + return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->rstc), > + "get reset failed\n"); > + > + rzg2l_gpt->clk = devm_clk_get(&pdev->dev, NULL); > + if (IS_ERR(rzg2l_gpt->clk)) > + return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->clk), > + "cannot get clock\n"); > + > + ret = reset_control_deassert(rzg2l_gpt->rstc); > + if (ret) > + return dev_err_probe(&pdev->dev, ret, > + "cannot deassert reset control\n"); > + > + pm_runtime_enable(&pdev->dev); > + ret = pm_runtime_resume_and_get(&pdev->dev); > + if (ret) > + goto err_reset; > + > + ret = clk_rate_exclusive_get(rzg2l_gpt->clk); There is a devm variant of this function in the mean time. > + if (ret) > + goto err_pm_put; > + > + rzg2l_gpt->rate = clk_get_rate(rzg2l_gpt->clk); > + if (!rzg2l_gpt->rate) { > + ret = dev_err_probe(&pdev->dev, -EINVAL, "gpt clk rate is 0"); > + goto err_clk_rate_put; > + } > + > + /* > + * Refuse clk rates > 1 GHz to prevent overflow later for computing > + * period and duty cycle. > + */ > + if (rzg2l_gpt->rate > NSEC_PER_SEC) { > + ret = -EINVAL; Error message please. > + goto err_clk_rate_put; > + } > + > + rzg2l_gpt->max_val = div64_u64((u64)U32_MAX * NSEC_PER_SEC, > + rzg2l_gpt->rate) * RZG2L_MAX_SCALE_FACTOR; > + > + /* > + * We need to keep the clock on, in case the bootloader has enabled the > + * PWM and is running during probe(). > + */ > + for (i = 0; i < RZG2L_MAX_PWM_CHANNELS; i++) { > + if (rzg2l_gpt_is_ch_enabled(rzg2l_gpt, i)) { > + set_bit(i, rzg2l_gpt->ch_en_bits); > + pm_runtime_get_sync(&pdev->dev); > + } > + } Best regards Uwe -- Pengutronix e.K. | Uwe Kleine-König | Industrial Linux Solutions | https://www.pengutronix.de/ |
Attachment:
signature.asc
Description: PGP signature