Hi Keiichi, On 24/07/18 05:18, Keiichi Watanabe wrote: > Hi Kieran, > > How is it going? > I would appreciate if you could review this patch. Thanks for the ping. ... This was on my todo list ... but just ... sooo many Todo's :D Ping bumped you to the top :) And done. Now to ping Laurent about my async patches :) <ping> -- Regards Kieran > Best regards, > Keiichi > > On Thu, Jun 28, 2018 at 2:21 AM, Kieran Bingham > <kieran.bingham@xxxxxxxxxxxxxxxx> wrote: >> Hi Keiichi, >> >> I just wanted to reply here quickly to say this is a really interesting >> development. >> >> I would like to review and test it properly; however I am now on holiday >> until at least the 9th July and so I won't be able to look until after >> that date. >> >> I didn't want you to be left hanging though :-) >> >> Ping me if you haven't heard anything by the 20th or so. Of course if >> anyone else gets round to testing and reviewing then that's great too. >> >> Regards >> -- >> Kieran >> >> >> >> On 27/06/18 11:34, Keiichi Watanabe wrote: >>> On some platforms with non-coherent DMA (e.g. ARM), USB drivers use >>> uncached memory allocation methods. In such situations, it sometimes >>> takes a long time to access URB buffers. This can be a cause of video >>> flickering problems if a resolution is high and a USB controller has >>> a very tight time limit. (e.g. dwc2) To avoid this problem, we copy >>> header data from (uncached) URB buffer into (cached) local buffer. >>> >>> This change should make the elapsed time of the interrupt handler >>> shorter on platforms with non-coherent DMA. We measured the elapsed >>> time of each callback of uvc_video_complete without/with this patch >>> while capturing Full HD video in >>> https://webrtc.github.io/samples/src/content/getusermedia/resolution/. >>> I tested it on the top of Kieran Bingham's Asynchronous UVC series >>> https://www.mail-archive.com/linux-media@xxxxxxxxxxxxxxx/msg128359.html. >>> The test device was Jerry Chromebook (RK3288) with Logitech Brio 4K. >>> I collected data for 5 seconds. (There were around 480 callbacks in >>> this case.) The following result shows that this patch makes >>> uvc_video_complete about 2x faster. >>> >>> | average | median | min | max | standard deviation >>> w/o caching| 45319ns | 40250ns | 33834ns | 142625ns| 16611ns >>> w/ caching| 20620ns | 19250ns | 12250ns | 56583ns | 6285ns >>> >>> In addition, we confirmed that this patch doesn't make it worse on >>> coherent DMA architecture by performing the same measurements on a >>> Broadwell Chromebox with the same camera. >>> >>> | average | median | min | max | standard deviation >>> w/o caching| 21026ns | 21424ns | 12263ns | 23956ns | 1932ns >>> w/ caching| 20728ns | 20398ns | 8922ns | 45120ns | 3368ns >>> >>> Signed-off-by: Keiichi Watanabe <keiichiw@xxxxxxxxxxxx> >>> --- >>> >>> After applying 6 patches in >>> https://www.mail-archive.com/linux-media@xxxxxxxxxxxxxxx/msg128359.html, >>> I measured elapsed time by adding the following code to >>> /drivers/media/usb/uvc/uvc_video.c >>> >>> @@ -XXXX,6 +XXXX,9 @@ static void uvc_video_complete(struct urb *urb) >>> struct uvc_video_queue *queue = &stream->queue; >>> struct uvc_buffer *buf = NULL; >>> int ret; >>> + ktime_t start, end; >>> + int elapsed_time; >>> + start = ktime_get(); >>> switch (urb->status) { >>> case 0: >>> >>> @@ -XXXX,6 +XXXX,10 @@ static void uvc_video_complete(struct urb *urb) >>> >>> INIT_WORK(&uvc_urb->work, uvc_video_copy_data_work); >>> queue_work(stream->async_wq, &uvc_urb->work); >>> + >>> + end = ktime_get(); >>> + elapsed_time = ktime_to_ns(ktime_sub(end, start)); >>> + pr_err("elapsed time: %d ns", elapsed_time); >>> } >>> >>> /* >>> >>> >>> drivers/media/usb/uvc/uvc_video.c | 92 +++++++++++++++---------------- >>> 1 file changed, 43 insertions(+), 49 deletions(-) >>> diff --git a/drivers/media/usb/uvc/uvc_video.c b/drivers/media/usb/uvc/uvc_video.c >>> index a88b2e51a666..ff2eddc55530 100644 >>> --- a/drivers/media/usb/uvc/uvc_video.c >>> +++ b/drivers/media/usb/uvc/uvc_video.c >>> @@ -391,36 +391,15 @@ static inline ktime_t uvc_video_get_time(void) >>> >>> static void >>> uvc_video_clock_decode(struct uvc_streaming *stream, struct uvc_buffer *buf, >>> - const u8 *data, int len) >>> + const u8 *data, int len, unsigned int header_size, >>> + bool has_pts, bool has_scr) >>> { >>> struct uvc_clock_sample *sample; >>> - unsigned int header_size; >>> - bool has_pts = false; >>> - bool has_scr = false; >>> unsigned long flags; >>> ktime_t time; >>> u16 host_sof; >>> u16 dev_sof; >>> >>> - switch (data[1] & (UVC_STREAM_PTS | UVC_STREAM_SCR)) { >>> - case UVC_STREAM_PTS | UVC_STREAM_SCR: >>> - header_size = 12; >>> - has_pts = true; >>> - has_scr = true; >>> - break; >>> - case UVC_STREAM_PTS: >>> - header_size = 6; >>> - has_pts = true; >>> - break; >>> - case UVC_STREAM_SCR: >>> - header_size = 8; >>> - has_scr = true; >>> - break; >>> - default: >>> - header_size = 2; >>> - break; >>> - } >>> - >>> /* Check for invalid headers. */ >>> if (len < header_size) >>> return; >>> @@ -717,11 +696,10 @@ void uvc_video_clock_update(struct uvc_streaming *stream, >>> */ >>> >>> static void uvc_video_stats_decode(struct uvc_streaming *stream, >>> - const u8 *data, int len) >>> + const u8 *data, int len, >>> + unsigned int header_size, bool has_pts, >>> + bool has_scr) >>> { >>> - unsigned int header_size; >>> - bool has_pts = false; >>> - bool has_scr = false; >>> u16 uninitialized_var(scr_sof); >>> u32 uninitialized_var(scr_stc); >>> u32 uninitialized_var(pts); >>> @@ -730,25 +708,6 @@ static void uvc_video_stats_decode(struct uvc_streaming *stream, >>> stream->stats.frame.nb_packets == 0) >>> stream->stats.stream.start_ts = ktime_get(); >>> >>> - switch (data[1] & (UVC_STREAM_PTS | UVC_STREAM_SCR)) { >>> - case UVC_STREAM_PTS | UVC_STREAM_SCR: >>> - header_size = 12; >>> - has_pts = true; >>> - has_scr = true; >>> - break; >>> - case UVC_STREAM_PTS: >>> - header_size = 6; >>> - has_pts = true; >>> - break; >>> - case UVC_STREAM_SCR: >>> - header_size = 8; >>> - has_scr = true; >>> - break; >>> - default: >>> - header_size = 2; >>> - break; >>> - } >>> - >>> /* Check for invalid headers. */ >>> if (len < header_size || data[0] < header_size) { >>> stream->stats.frame.nb_invalid++; >>> @@ -957,10 +916,41 @@ static void uvc_video_stats_stop(struct uvc_streaming *stream) >>> * to be called with a NULL buf parameter. uvc_video_decode_data and >>> * uvc_video_decode_end will never be called with a NULL buffer. >>> */ >>> +static void uvc_video_decode_header_size(const u8 *data, int *header_size, >>> + bool *has_pts, bool *has_scr) >>> +{ >>> + switch (data[1] & (UVC_STREAM_PTS | UVC_STREAM_SCR)) { >>> + case UVC_STREAM_PTS | UVC_STREAM_SCR: >>> + *header_size = 12; >>> + *has_pts = true; >>> + *has_scr = true; >>> + break; >>> + case UVC_STREAM_PTS: >>> + *header_size = 6; >>> + *has_pts = true; >>> + break; >>> + case UVC_STREAM_SCR: >>> + *header_size = 8; >>> + *has_scr = true; >>> + break; >>> + default: >>> + *header_size = 2; >>> + } >>> +} >>> + >>> static int uvc_video_decode_start(struct uvc_streaming *stream, >>> - struct uvc_buffer *buf, const u8 *data, int len) >>> + struct uvc_buffer *buf, const u8 *urb_data, >>> + int len) >>> { >>> u8 fid; >>> + u8 data[12]; >>> + unsigned int header_size; >>> + bool has_pts = false, has_scr = false; >>> + >>> + /* Cache the header since urb_data is uncached memory. The size of >>> + * header is at most 12 bytes. >>> + */ >>> + memcpy(data, urb_data, min(len, 12)); >>> >>> /* Sanity checks: >>> * - packet must be at least 2 bytes long >>> @@ -983,8 +973,12 @@ static int uvc_video_decode_start(struct uvc_streaming *stream, >>> uvc_video_stats_update(stream); >>> } >>> >>> - uvc_video_clock_decode(stream, buf, data, len); >>> - uvc_video_stats_decode(stream, data, len); >>> + uvc_video_decode_header_size(data, &header_size, &has_pts, &has_scr); >>> + >>> + uvc_video_clock_decode(stream, buf, data, len, header_size, has_pts, >>> + has_scr); >>> + uvc_video_stats_decode(stream, data, len, header_size, has_pts, >>> + has_scr); >>> >>> /* Store the payload FID bit and return immediately when the buffer is >>> * NULL. >>> -- >>> 2.18.0.rc2.346.g013aa6912e-goog >>> >> >> -- >> Regards >> -- >> Kieran -- Regards -- Kieran