RE: FAILED: patch "[PATCH] ice: fix concurrent reset and removal of VFs" failed to apply to 5.15-stable tree

"Keller, Jacob E" <jacob.e.keller@xxxxxxxxx> · Fri, 25 Feb 2022 19:49:28 +0000

> -----Original Message-----
> From: gregkh@xxxxxxxxxxxxxxxxxxx <gregkh@xxxxxxxxxxxxxxxxxxx>
> Sent: Friday, February 25, 2022 7:25 AM
> To: Keller, Jacob E <jacob.e.keller@xxxxxxxxx>; Nguyen, Anthony L
> <anthony.l.nguyen@xxxxxxxxx>; Jankowski, Konrad0
> <konrad0.jankowski@xxxxxxxxx>
> Cc: stable@xxxxxxxxxxxxxxx
> Subject: FAILED: patch "[PATCH] ice: fix concurrent reset and removal of VFs"
> failed to apply to 5.15-stable tree
> 
> 
> The patch below does not apply to the 5.15-stable tree.
> If someone wants it applied there, or to any other stable or longterm
> tree, then please email the backport, including the original git commit
> id to <stable@xxxxxxxxxxxxxxx>.
> 
> thanks,
> 
> greg k-h
> 

I think this fix is important. It turns out to depend on another important fix: e6ba5273d4ed ("ice: Fix race conditions between virtchnl handling and VF ndo ops").

I will shortly submit both of these fixes together, after I confirm that it applies cleanly.

Thanks,
Jake

> ------------------ original commit in Linus's tree ------------------
> 
> From fadead80fe4c033b5e514fcbadd20b55c4494112 Mon Sep 17 00:00:00 2001
> From: Jacob Keller <jacob.e.keller@xxxxxxxxx>
> Date: Mon, 7 Feb 2022 10:23:29 -0800
> Subject: [PATCH] ice: fix concurrent reset and removal of VFs
> 
> Commit c503e63200c6 ("ice: Stop processing VF messages during teardown")
> introduced a driver state flag, ICE_VF_DEINIT_IN_PROGRESS, which is
> intended to prevent some issues with concurrently handling messages from
> VFs while tearing down the VFs.
> 
> This change was motivated by crashes caused while tearing down and
> bringing up VFs in rapid succession.
> 
> It turns out that the fix actually introduces issues with the VF driver
> caused because the PF no longer responds to any messages sent by the VF
> during its .remove routine. This results in the VF potentially removing
> its DMA memory before the PF has shut down the device queues.
> 
> Additionally, the fix doesn't actually resolve concurrency issues within
> the ice driver. It is possible for a VF to initiate a reset just prior
> to the ice driver removing VFs. This can result in the remove task
> concurrently operating while the VF is being reset. This results in
> similar memory corruption and panics purportedly fixed by that commit.
> 
> Fix this concurrency at its root by protecting both the reset and
> removal flows using the existing VF cfg_lock. This ensures that we
> cannot remove the VF while any outstanding critical tasks such as a
> virtchnl message or a reset are occurring.
> 
> This locking change also fixes the root cause originally fixed by commit
> c503e63200c6 ("ice: Stop processing VF messages during teardown"), so we
> can simply revert it.
> 
> Note that I kept these two changes together because simply reverting the
> original commit alone would leave the driver vulnerable to worse race
> conditions.
> 
> Fixes: c503e63200c6 ("ice: Stop processing VF messages during teardown")
> Signed-off-by: Jacob Keller <jacob.e.keller@xxxxxxxxx>
> Tested-by: Konrad Jankowski <konrad0.jankowski@xxxxxxxxx>
> Signed-off-by: Tony Nguyen <anthony.l.nguyen@xxxxxxxxx>
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice.h
> b/drivers/net/ethernet/intel/ice/ice.h
> index a9fa701aaa95..473b1f6be9de 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -280,7 +280,6 @@ enum ice_pf_state {
>  	ICE_VFLR_EVENT_PENDING,
>  	ICE_FLTR_OVERFLOW_PROMISC,
>  	ICE_VF_DIS,
> -	ICE_VF_DEINIT_IN_PROGRESS,
>  	ICE_CFG_BUSY,
>  	ICE_SERVICE_SCHED,
>  	ICE_SERVICE_DIS,
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c
> b/drivers/net/ethernet/intel/ice/ice_main.c
> index 17a9bb461dc3..f3c346e13b7a 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -1799,7 +1799,9 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
>  				 * reset, so print the event prior to reset.
>  				 */
>  				ice_print_vf_rx_mdd_event(vf);
> +				mutex_lock(&pf->vf[i].cfg_lock);
>  				ice_reset_vf(&pf->vf[i], false);
> +				mutex_unlock(&pf->vf[i].cfg_lock);
>  			}
>  		}
>  	}
> diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> index 39b80124d282..408f78e3eb13 100644
> --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
> @@ -500,8 +500,6 @@ void ice_free_vfs(struct ice_pf *pf)
>  	struct ice_hw *hw = &pf->hw;
>  	unsigned int tmp, i;
> 
> -	set_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
> -
>  	if (!pf->vf)
>  		return;
> 
> @@ -519,22 +517,26 @@ void ice_free_vfs(struct ice_pf *pf)
>  	else
>  		dev_warn(dev, "VFs are assigned - not disabling SR-IOV\n");
> 
> -	/* Avoid wait time by stopping all VFs at the same time */
> -	ice_for_each_vf(pf, i)
> -		ice_dis_vf_qs(&pf->vf[i]);
> -
>  	tmp = pf->num_alloc_vfs;
>  	pf->num_qps_per_vf = 0;
>  	pf->num_alloc_vfs = 0;
>  	for (i = 0; i < tmp; i++) {
> -		if (test_bit(ICE_VF_STATE_INIT, pf->vf[i].vf_states)) {
> +		struct ice_vf *vf = &pf->vf[i];
> +
> +		mutex_lock(&vf->cfg_lock);
> +
> +		ice_dis_vf_qs(vf);
> +
> +		if (test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
>  			/* disable VF qp mappings and set VF disable state */
> -			ice_dis_vf_mappings(&pf->vf[i]);
> -			set_bit(ICE_VF_STATE_DIS, pf->vf[i].vf_states);
> -			ice_free_vf_res(&pf->vf[i]);
> +			ice_dis_vf_mappings(vf);
> +			set_bit(ICE_VF_STATE_DIS, vf->vf_states);
> +			ice_free_vf_res(vf);
>  		}
> 
> -		mutex_destroy(&pf->vf[i].cfg_lock);
> +		mutex_unlock(&vf->cfg_lock);
> +
> +		mutex_destroy(&vf->cfg_lock);
>  	}
> 
>  	if (ice_sriov_free_msix_res(pf))
> @@ -570,7 +572,6 @@ void ice_free_vfs(struct ice_pf *pf)
>  				i);
> 
>  	clear_bit(ICE_VF_DIS, pf->state);
> -	clear_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state);
>  	clear_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
>  }
> 
> @@ -1498,6 +1499,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
>  	ice_for_each_vf(pf, v) {
>  		vf = &pf->vf[v];
> 
> +		mutex_lock(&vf->cfg_lock);
> +
>  		vf->driver_caps = 0;
>  		ice_vc_set_default_allowlist(vf);
> 
> @@ -1512,6 +1515,8 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
>  		ice_vf_pre_vsi_rebuild(vf);
>  		ice_vf_rebuild_vsi(vf);
>  		ice_vf_post_vsi_rebuild(vf);
> +
> +		mutex_unlock(&vf->cfg_lock);
>  	}
> 
>  	if (ice_is_eswitch_mode_switchdev(pf))
> @@ -1562,6 +1567,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
>  	u32 reg;
>  	int i;
> 
> +	lockdep_assert_held(&vf->cfg_lock);
> +
>  	dev = ice_pf_to_dev(pf);
> 
>  	if (test_bit(ICE_VF_RESETS_DISABLED, pf->state)) {
> @@ -2061,9 +2068,12 @@ void ice_process_vflr_event(struct ice_pf *pf)
>  		bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
>  		/* read GLGEN_VFLRSTAT register to find out the flr VFs */
>  		reg = rd32(hw, GLGEN_VFLRSTAT(reg_idx));
> -		if (reg & BIT(bit_idx))
> +		if (reg & BIT(bit_idx)) {
>  			/* GLGEN_VFLRSTAT bit will be cleared in ice_reset_vf */
> +			mutex_lock(&vf->cfg_lock);
>  			ice_reset_vf(vf, true);
> +			mutex_unlock(&vf->cfg_lock);
> +		}
>  	}
>  }
> 
> @@ -2140,7 +2150,9 @@ ice_vf_lan_overflow_event(struct ice_pf *pf, struct
> ice_rq_event_info *event)
>  	if (!vf)
>  		return;
> 
> +	mutex_lock(&vf->cfg_lock);
>  	ice_vc_reset_vf(vf);
> +	mutex_unlock(&vf->cfg_lock);
>  }
> 
>  /**
> @@ -4625,10 +4637,6 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct
> ice_rq_event_info *event)
>  	struct device *dev;
>  	int err = 0;
> 
> -	/* if de-init is underway, don't process messages from VF */
> -	if (test_bit(ICE_VF_DEINIT_IN_PROGRESS, pf->state))
> -		return;
> -
>  	dev = ice_pf_to_dev(pf);
>  	if (ice_validate_vf_id(pf, vf_id)) {
>  		err = -EINVAL;