Hi Uwe, Thanks for the feedback. > -----Original Message----- > From: Uwe Kleine-König <u.kleine-koenig@xxxxxxxxxxxxxx> > Sent: Wednesday, December 6, 2023 6:38 PM > Subject: Re: [PATCH v17 3/4] pwm: Add support for RZ/G2L GPT > > Hello Biju, > > On Mon, Nov 20, 2023 at 11:33:06AM +0000, Biju Das wrote: > > diff --git a/drivers/pwm/pwm-rzg2l-gpt.c b/drivers/pwm/pwm-rzg2l-gpt.c > > new file mode 100644 index 000000000000..428e6e577db6 > > --- /dev/null > > +++ b/drivers/pwm/pwm-rzg2l-gpt.c > > @@ -0,0 +1,572 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +/* > > + * Renesas RZ/G2L General PWM Timer (GPT) driver > > + * > > + * Copyright (C) 2023 Renesas Electronics Corporation > > + * > > + * Hardware manual for this IP can be found here > > + * > > +https://www.renesas.com/eu/en/document/mah/rzg2l-group-rzg2lc-group-u > > +sers-manual-hardware-0?language=en > > + * > > + * Limitations: > > + * - Counter must be stopped before modifying Mode and Prescaler. > > + * - When PWM is disabled, the output is driven to inactive. > > + * - While the hardware supports both polarities, the driver (for now) > > + * only handles normal polarity. > > + * - When both channels are used, disabling the channel on one stops > the > > + * other. > > Just for my understanding: The combination of the first and the last item > means you cannot update Moed and Prescaler for one channel without > affecting the other one, right? Yes that is correct. > > > + */ > > + > > +#include <linux/bitfield.h> > > +#include <linux/clk.h> > > +#include <linux/io.h> > > +#include <linux/limits.h> > > +#include <linux/module.h> > > +#include <linux/of.h> > > +#include <linux/platform_device.h> > > +#include <linux/pm_runtime.h> > > +#include <linux/pwm.h> > > +#include <linux/reset.h> > > +#include <linux/time.h> > > + > > +#define RZG2L_GTCR 0x2c > > +#define RZG2L_GTUDDTYC 0x30 > > +#define RZG2L_GTIOR 0x34 > > +#define RZG2L_GTBER 0x40 > > +#define RZG2L_GTCNT 0x48 > > +#define RZG2L_GTCCRA 0x4c > > +#define RZG2L_GTCCRB 0x50 > > +#define RZG2L_GTPR 0x64 > > + > > +#define RZG2L_GTCR_CST BIT(0) > > +#define RZG2L_GTCR_MD GENMASK(18, 16) > > +#define RZG2L_GTCR_TPCS GENMASK(26, 24) > > + > > +#define RZG2L_GTCR_MD_SAW_WAVE_PWM_MODE FIELD_PREP(RZG2L_GTCR_MD, > 0) > > + > > +#define RZG2L_GTUDDTYC_UP BIT(0) > > +#define RZG2L_GTUDDTYC_UDF BIT(1) > > +#define RZG2L_UP_COUNTING (RZG2L_GTUDDTYC_UP | RZG2L_GTUDDTYC_UDF) > > + > > +#define RZG2L_GTIOR_GTIOA GENMASK(4, 0) > > +#define RZG2L_GTIOR_GTIOB GENMASK(20, 16) > > +#define RZG2L_GTIOR_OAE BIT(8) > > +#define RZG2L_GTIOR_OBE BIT(24) > > + > > +#define RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE 0x07 > > +#define RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE 0x1b > > + > > +#define RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH \ > > + (RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE | RZG2L_GTIOR_OAE) #define > > +RZG2L_GTIOR_GTIOA_OUT_LO_END_TOGGLE_CMP_MATCH \ > > + (RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE | RZG2L_GTIOR_OAE) #define > > +RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH \ > > + (FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_HI_OUT_HI_END_TOGGLE) > > +| RZG2L_GTIOR_OBE) #define > RZG2L_GTIOR_GTIOB_OUT_LO_END_TOGGLE_CMP_MATCH \ > > + (FIELD_PREP(RZG2L_GTIOR_GTIOB, RZG2L_INIT_OUT_LO_OUT_LO_END_TOGGLE) > > +| RZG2L_GTIOR_OBE) > > These are not all used. Is it sensible to still keep them? I will remove unused ones. > > > +#define RZG2L_GTCCR(i) (0x4c + 4 * (i)) > > For i = 0 this is RZG2L_GTCCRA, for i = 1 it's RZG2L_GTCCRB. Als > RZG2L_GTCCRA and RZG2L_GTCCRB are unused. Maybe move the definition of > RZG2L_GTCCR to the list of registers above? Agreed. > > > +#define RZG2L_MAX_HW_CHANNELS 8 > > +#define RZG2L_CHANNELS_PER_IO 2 > > +#define RZG2L_MAX_PWM_CHANNELS (RZG2L_MAX_HW_CHANNELS * > RZG2L_CHANNELS_PER_IO) > > +#define RZG2L_MAX_SCALE_FACTOR 1024 > > + > > +#define RZG2L_IS_IOB(a) ((a) & 0x1) > > +#define RZG2L_GET_CH(a) ((a) / 2) > > + > > +#define RZG2L_GET_CH_OFFS(i) (0x100 * (i)) > > + > > +struct rzg2l_gpt_chip { > > + struct pwm_chip chip; > > + void __iomem *mmio; > > + struct reset_control *rstc; > > + struct clk *clk; > > + struct mutex lock; /* lock to protect shared channel resources */ > > + unsigned long rate; > > + u64 max_val; > > + u32 state_period[RZG2L_MAX_HW_CHANNELS]; > > + u32 user_count[RZG2L_MAX_HW_CHANNELS]; > > + u32 enable_count[RZG2L_MAX_HW_CHANNELS]; > > + DECLARE_BITMAP(ch_en_bits, RZG2L_MAX_PWM_CHANNELS); }; > > + > > [...] > > + > > +static bool rzg2l_gpt_is_ch_enabled(struct rzg2l_gpt_chip *rzg2l_gpt, > > +u8 hwpwm) { > > + u8 ch = RZG2L_GET_CH(hwpwm); > > + u32 offs = RZG2L_GET_CH_OFFS(ch); > > + bool is_counter_running, is_output_en; > > + u32 val; > > + > > + val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTCR); > > + is_counter_running = val & RZG2L_GTCR_CST; > > + > > + val = rzg2l_gpt_read(rzg2l_gpt, offs + RZG2L_GTIOR); > > + if (RZG2L_IS_IOB(hwpwm)) > > + is_output_en = val & RZG2L_GTIOR_OBE; > > + else > > + is_output_en = val & RZG2L_GTIOR_OAE; > > IIUC the i in RZG2L_GTCCR(i) stands for either A (0) or B (1), right? > Maybe define RZG2L_GTIOR_OxE(i) assuming this is about the same A and B > here? Then this could be simplified to: > > is_output_en = val & RZG2L_GTIOR_OxE(rzg2l_gpt_subchannel(hwpwm)); > > (maybe modulo better names). OK. > > > + return (is_counter_running && is_output_en); > > You could return early after knowing that is_counter_running is false > without determining is_output_en. Agreed. > > > +} > > + > > +static int rzg2l_gpt_enable(struct rzg2l_gpt_chip *rzg2l_gpt, > > + struct pwm_device *pwm) > > +{ > > + u8 ch = RZG2L_GET_CH(pwm->hwpwm); > > + u32 offs = RZG2L_GET_CH_OFFS(ch); > > + > > + /* Enable pin output */ > > + if (RZG2L_IS_IOB(pwm->hwpwm)) > > + rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR, > > + RZG2L_GTIOR_GTIOB | RZG2L_GTIOR_OBE, > > + RZG2L_GTIOR_GTIOB_OUT_HI_END_TOGGLE_CMP_MATCH); > > + else > > + rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTIOR, > > + RZG2L_GTIOR_GTIOA | RZG2L_GTIOR_OAE, > > + RZG2L_GTIOR_GTIOA_OUT_HI_END_TOGGLE_CMP_MATCH); > > Similar suggestion as above for A and B?! OK. > > > + mutex_lock(&rzg2l_gpt->lock); > > + if (!rzg2l_gpt->enable_count[ch]) > > + rzg2l_gpt_modify(rzg2l_gpt, offs + RZG2L_GTCR, 0, > RZG2L_GTCR_CST); > > + > > + rzg2l_gpt->enable_count[ch]++; > > + mutex_unlock(&rzg2l_gpt->lock); > > + > > + return 0; > > +} > > + > > [...] > > + > > +static u64 calculate_period_or_duty(struct rzg2l_gpt_chip *rzg2l_gpt, > > +u32 val, u8 prescale) { > > + u64 retval; > > + u64 tmp; > > + > > + tmp = NSEC_PER_SEC * (u64)val; > > + /* > > + * To avoid losing precision for smaller period/duty cycle values > > + * ((2^32 * 10^9 << 2) < 2^64), do not process the rounded values. > > + */ > > + if (prescale < 2) > > + retval = DIV64_U64_ROUND_UP(tmp << (2 * prescale), rzg2l_gpt- > >rate); > > + else > > + retval = DIV64_U64_ROUND_UP(tmp, rzg2l_gpt->rate) << (2 * > > +prescale); > > Maybe introduce a mul_u64_u64_div64_roundup (similar to > mul_u64_u64_div64) to also be exact for prescale >= 2? mul_u64_u64_div_u64()has a bug already and we are losing precision with very high values. See the output with CONFIG_PWM_DEBUG enabled[1]. So we cannot reuse mul_u64_u64_div64() for roundup operation. Maybe we need to come with 128bit operations for mul_u64_u64_div64_roundup(). Do you have any specific algorithm in mind for doing mul_u64_u64_div64_roundup()? Fabrizio already developed an algorithm for 128 bit operation, I could reuse once it hits the mainline. [1] ######[ 304.213944] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 5500000000000/43980352512000) -> (ena=1 pol=0 5500000000000/43980239923200) High setting## [ 304.230854] pwm-rzg2l-gpt 10048000.pwm: .apply is not idempotent (ena=1 pol=0 23980465100800/43980352512000) -> (ena=1 pol=0 23980465100800/43980239923200) > > With prescale <= 5 and rzg2l_gpt->rate >= 1024 this shouldn't work just > fine. Practically this is not possible. Yes, from theoretical point, rate=1kHz compared to the practical case, 100MHz won't work. For the practical case, the current logic is fine. If we need to achieve theoretical example like you mentioned then we need to have mul_u64_u64_div64_roundup(). > > > + return retval; > > +} > > + > > [...] > > +static u32 rzg2l_gpt_calculate_pv_or_dc(u64 period_or_duty_cycle, u8 > > +prescale) { > > + return min_t(u64, DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 * > prescale)), > > + (u64)U32_MAX); > > DIV64_U64_ROUND_UP(period_or_duty_cycle, 1 << (2 * prescale)) can be > calculated a bit cheaper by using: > > (period_or_duty_cycle + 1 << (2 * prescale) - 1) >> (2 * prescale) > > assuming the LHS doesn't overflow. Need to check this. > > When using min_t, you can drop the cast for U32_MAX. OK. > > > +} > > + > > +/* Caller holds the lock while calling rzg2l_gpt_config() */ static > > +int rzg2l_gpt_config(struct pwm_chip *chip, struct pwm_device *pwm, > > + const struct pwm_state *state) { > > + struct rzg2l_gpt_chip *rzg2l_gpt = to_rzg2l_gpt_chip(chip); > > + u8 ch = RZG2L_GET_CH(pwm->hwpwm); > > + u32 offs = RZG2L_GET_CH_OFFS(ch); > > + unsigned long pv, dc; > > + u64 period_cycles; > > + u64 duty_cycles; > > + u8 prescale; > > + > > + /* > > + * GPT counter is shared by multiple channels, so prescale and > period > > + * can NOT be modified when there are multiple channels in use with > > + * different settings. > > + */ > > + if (state->period != rzg2l_gpt->state_period[ch] && rzg2l_gpt- > >user_count[ch] > 1) > > + return -EBUSY; > > if (state->period < rzg2l_gpt->state_period[ch] && rzg2l_gpt- > >user_count[ch] > 1) > > would be more forgiving and still correct. OK. > > > + /* Limit period/duty cycle to max value supported by the HW */ > > + if (state->period > rzg2l_gpt->max_val) > > + period_cycles = rzg2l_gpt->max_val; > > + else > > + period_cycles = state->period; > > + > > + period_cycles = mul_u64_u32_div(period_cycles, rzg2l_gpt->rate, > NSEC_PER_SEC); > > + prescale = rzg2l_gpt_calculate_prescale(rzg2l_gpt, period_cycles); > > + > > + pv = rzg2l_gpt_calculate_pv_or_dc(period_cycles, prescale); > > + > > + if (state->duty_cycle > rzg2l_gpt->max_val) > > + duty_cycles = rzg2l_gpt->max_val; > > + else > > + duty_cycles = state->duty_cycle; > > + > > + duty_cycles = mul_u64_u32_div(duty_cycles, rzg2l_gpt->rate, > NSEC_PER_SEC); > > + dc = rzg2l_gpt_calculate_pv_or_dc(duty_cycles, prescale); > > + > > + /* > > + * GPT counter is shared by multiple channels, we cache the period > value > > + * from the first enabled channel and use the same value for both > > + * channels. > > + */ > > + rzg2l_gpt->state_period[ch] = state->period; > > With the rounding that happens above, I think it would be more approriate > to track period_cycles here instead of period. OK. > > > [...] > > + rzg2l_gpt->clk = devm_clk_get(&pdev->dev, NULL); > > + if (IS_ERR(rzg2l_gpt->clk)) > > + return dev_err_probe(&pdev->dev, PTR_ERR(rzg2l_gpt->clk), > > + "cannot get clock\n"); > > + > > + ret = reset_control_deassert(rzg2l_gpt->rstc); > > + if (ret) > > + return dev_err_probe(&pdev->dev, ret, > > + "cannot deassert reset control\n"); > > + > > + ret = clk_prepare_enable(rzg2l_gpt->clk); > > + if (ret) > > + goto err_reset; > > devm_clk_get_enabled()? OK. > > > [...] > > + /* > > + * Refuse clk rates > 1 GHz to prevent overflow later for computing > > + * period and duty cycle. > > + */ > > + if (rzg2l_gpt->rate > NSEC_PER_SEC) { > > + ret = -EINVAL; > > + goto err_clk_rate_put; > > + } > > + > > + rzg2l_gpt->max_val = mul_u64_u64_div_u64(U32_MAX, NSEC_PER_SEC, > > + rzg2l_gpt->rate) * > RZG2L_MAX_SCALE_FACTOR; > > Is it clear that this won't overflow? It won't over flow as [2] < [3] (2^32 * 10 ^9 % 100 * 10^6) * 1024. 2^32 * 10 ^9 = 4,294,967,296,000,000,000 --[2] 2^64 = 18,446,744,073,709,551,616--[3] Cheers, Biju > > > + /* > > + * We need to keep the clock on, in case the bootloader has enabled > the > > + * PWM and is running during probe(). > > + */ >