Re: [PATCH] USB: Revert "cdc-wdm: fix "out-of-sync" due to missing notifications"

Daniele Palmas <dnlplm@xxxxxxxxx> · Fri, 21 Apr 2017 17:15:05 +0200

Hi Bjørn,

2017-04-21 10:01 GMT+02:00 Bjørn Mork <bjorn@xxxxxxx>:
> This reverts commit 833415a3e781 ("cdc-wdm: fix "out-of-sync" due to
> missing notifications")
>
> There have been several reports of wdm_read returning unexpected EIO
> errors with QMI devices using the qmi_wwan driver. The reporters
> confirm that reverting prevents these errors. I have been unable to
> reproduce the bug myself, and have no explanation to offer either. But
> reverting is the safe choice here, given that the commit was an
> attempt to work around a firmware problem.  Living with a firmware
> problem is still better than adding driver bugs.
>
> Reported-by: Kasper Holtze <kasper@xxxxxxxxx>
> Reported-by: Aleksander Morgado <aleksander@xxxxxxxxxxxxx>
> Reported-by: Daniele Palmas <dnlplm@xxxxxxxxx>
> Cc: <stable@xxxxxxxxxxxxxxx> # v4.9+
> Fixes: 833415a3e781 ("cdc-wdm: fix "out-of-sync" due to missing notifications")
> Signed-off-by: Bjørn Mork <bjorn@xxxxxxx>
> ---

Tested with a variety of Telit modems with recent and older QC
chipsets, verified working fine on my side.

Thanks a lot,
Daniele

>  drivers/usb/class/cdc-wdm.c | 103 ++------------------------------------------
>  1 file changed, 4 insertions(+), 99 deletions(-)
>
> diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c
> index 8fda45a45bd3..08669fee6d7f 100644
> --- a/drivers/usb/class/cdc-wdm.c
> +++ b/drivers/usb/class/cdc-wdm.c
> @@ -58,7 +58,6 @@ MODULE_DEVICE_TABLE (usb, wdm_ids);
>  #define WDM_SUSPENDING         8
>  #define WDM_RESETTING          9
>  #define WDM_OVERFLOW           10
> -#define WDM_DRAIN_ON_OPEN      11
>
>  #define WDM_MAX                        16
>
> @@ -182,7 +181,7 @@ static void wdm_in_callback(struct urb *urb)
>                                 "nonzero urb status received: -ESHUTDOWN\n");
>                         goto skip_error;
>                 case -EPIPE:
> -                       dev_dbg(&desc->intf->dev,
> +                       dev_err(&desc->intf->dev,
>                                 "nonzero urb status received: -EPIPE\n");
>                         break;
>                 default:
> @@ -210,25 +209,6 @@ static void wdm_in_callback(struct urb *urb)
>                         desc->reslength = length;
>                 }
>         }
> -
> -       /*
> -        * Handling devices with the WDM_DRAIN_ON_OPEN flag set:
> -        * If desc->resp_count is unset, then the urb was submitted
> -        * without a prior notification.  If the device returned any
> -        * data, then this implies that it had messages queued without
> -        * notifying us.  Continue reading until that queue is flushed.
> -        */
> -       if (!desc->resp_count) {
> -               if (!length) {
> -                       /* do not propagate the expected -EPIPE */
> -                       desc->rerr = 0;
> -                       goto unlock;
> -               }
> -               dev_dbg(&desc->intf->dev, "got %d bytes without notification\n", length);
> -               set_bit(WDM_RESPONDING, &desc->flags);
> -               usb_submit_urb(desc->response, GFP_ATOMIC);
> -       }
> -
>  skip_error:
>         set_bit(WDM_READ, &desc->flags);
>         wake_up(&desc->wait);
> @@ -243,7 +223,6 @@ static void wdm_in_callback(struct urb *urb)
>                 service_outstanding_interrupt(desc);
>         }
>
> -unlock:
>         spin_unlock(&desc->iuspin);
>  }
>
> @@ -686,17 +665,6 @@ static int wdm_open(struct inode *inode, struct file *file)
>                         dev_err(&desc->intf->dev,
>                                 "Error submitting int urb - %d\n", rv);
>                         rv = usb_translate_errors(rv);
> -               } else if (test_bit(WDM_DRAIN_ON_OPEN, &desc->flags)) {
> -                       /*
> -                        * Some devices keep pending messages queued
> -                        * without resending notifications.  We must
> -                        * flush the message queue before we can
> -                        * assume a one-to-one relationship between
> -                        * notifications and messages in the queue
> -                        */
> -                       dev_dbg(&desc->intf->dev, "draining queued data\n");
> -                       set_bit(WDM_RESPONDING, &desc->flags);
> -                       rv = usb_submit_urb(desc->response, GFP_KERNEL);
>                 }
>         } else {
>                 rv = 0;
> @@ -803,8 +771,7 @@ static void wdm_rxwork(struct work_struct *work)
>  /* --- hotplug --- */
>
>  static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor *ep,
> -               u16 bufsize, int (*manage_power)(struct usb_interface *, int),
> -               bool drain_on_open)
> +               u16 bufsize, int (*manage_power)(struct usb_interface *, int))
>  {
>         int rv = -ENOMEM;
>         struct wdm_device *desc;
> @@ -891,68 +858,6 @@ static int wdm_create(struct usb_interface *intf, struct usb_endpoint_descriptor
>
>         desc->manage_power = manage_power;
>
> -       /*
> -        * "drain_on_open" enables a hack to work around a firmware
> -        * issue observed on network functions, in particular MBIM
> -        * functions.
> -        *
> -        * Quoting section 7 of the CDC-WMC r1.1 specification:
> -        *
> -        *  "The firmware shall interpret GetEncapsulatedResponse as a
> -        *   request to read response bytes. The firmware shall send
> -        *   the next wLength bytes from the response. The firmware
> -        *   shall allow the host to retrieve data using any number of
> -        *   GetEncapsulatedResponse requests. The firmware shall
> -        *   return a zero- length reply if there are no data bytes
> -        *   available.
> -        *
> -        *   The firmware shall send ResponseAvailable notifications
> -        *   periodically, using any appropriate algorithm, to inform
> -        *   the host that there is data available in the reply
> -        *   buffer. The firmware is allowed to send ResponseAvailable
> -        *   notifications even if there is no data available, but
> -        *   this will obviously reduce overall performance."
> -        *
> -        * These requirements, although they make equally sense, are
> -        * often not implemented by network functions. Some firmwares
> -        * will queue data indefinitely, without ever resending a
> -        * notification. The result is that the driver and firmware
> -        * loses "syncronization" if the driver ever fails to respond
> -        * to a single notification, something which easily can happen
> -        * on release(). When this happens, the driver will appear to
> -        * never receive notifications for the most current data. Each
> -        * notification will only cause a single read, which returns
> -        * the oldest data in the firmware's queue.
> -        *
> -        * The "drain_on_open" hack resolves the situation by draining
> -        * data from the firmware until none is returned, without a
> -        * prior notification.
> -        *
> -        * This will inevitably race with the firmware, risking that
> -        * we read data from the device before handling the associated
> -        * notification. To make things worse, some of the devices
> -        * needing the hack do not implement the "return zero if no
> -        * data is available" requirement either. Instead they return
> -        * an error on the subsequent read in this case.  This means
> -        * that "winning" the race can cause an unexpected EIO to
> -        * userspace.
> -        *
> -        * "winning" the race is more likely on resume() than on
> -        * open(), and the unexpected error is more harmful in the
> -        * middle of an open session. The hack is therefore only
> -        * applied on open(), and not on resume() where it logically
> -        * would be equally necessary. So we define open() as the only
> -        * driver <-> device "syncronization point".  Should we happen
> -        * to lose a notification after open(), then syncronization
> -        * will be lost until release()
> -        *
> -        * The hack should not be enabled for CDC WDM devices
> -        * conforming to the CDC-WMC r1.1 specification.  This is
> -        * ensured by setting drain_on_open to false in wdm_probe().
> -        */
> -       if (drain_on_open)
> -               set_bit(WDM_DRAIN_ON_OPEN, &desc->flags);
> -
>         spin_lock(&wdm_device_list_lock);
>         list_add(&desc->device_list, &wdm_device_list);
>         spin_unlock(&wdm_device_list_lock);
> @@ -1006,7 +911,7 @@ static int wdm_probe(struct usb_interface *intf, const struct usb_device_id *id)
>                 goto err;
>         ep = &iface->endpoint[0].desc;
>
> -       rv = wdm_create(intf, ep, maxcom, &wdm_manage_power, false);
> +       rv = wdm_create(intf, ep, maxcom, &wdm_manage_power);
>
>  err:
>         return rv;
> @@ -1038,7 +943,7 @@ struct usb_driver *usb_cdc_wdm_register(struct usb_interface *intf,
>  {
>         int rv = -EINVAL;
>
> -       rv = wdm_create(intf, ep, bufsize, manage_power, true);
> +       rv = wdm_create(intf, ep, bufsize, manage_power);
>         if (rv < 0)
>                 goto err;
>
> --
> 2.11.0
>