Re: [spice v13 11/29] server: Avoid copying the input frame in the GStreamer encoder

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 19, 2016 at 09:49:44AM +0200, Francois Gouget wrote:
> Note that we can only avoid copies for the first 1 Mpixels or so.
> That's because Spice splits larger frames into more chunks than we can
> fit GstMemory fragments in a GStreamer buffer. So if there are more
> pixels we will avoid copies for the first 3840 KB and copy the rest.
> Furthermore, while in practice the GStreamer encoder will only modify
> the RedDrawable refcount during the encode_frame(), in theory the
> refcount could be decremented from the GStreamer thread after
> encode_frame() returns.
> 
> Signed-off-by: Francois Gouget <fgouget@xxxxxxxxxxxxxxx>
> ---
>  server/dcc-send.c          |  13 +--
>  server/gstreamer-encoder.c | 199 +++++++++++++++++++++++++++++++++++++++++----
>  server/mjpeg-encoder.c     |   5 +-
>  server/stream.c            |  16 +++-
>  server/video-encoder.h     |  25 +++++-
>  5 files changed, 231 insertions(+), 27 deletions(-)
> 
> diff --git a/server/dcc-send.c b/server/dcc-send.c
> index a2aea2a..68b026e 100644
> --- a/server/dcc-send.c
> +++ b/server/dcc-send.c
> @@ -1656,12 +1656,12 @@ static void red_release_video_encoder_buffer(uint8_t *data, void *opaque)
>  }
>  
>  static int red_marshall_stream_data(RedChannelClient *rcc,
> -                                    SpiceMarshaller *base_marshaller, Drawable *drawable)
> +                                    SpiceMarshaller *base_marshaller,
> +                                    Drawable *drawable)
>  {
>      DisplayChannelClient *dcc = RCC_TO_DCC(rcc);
>      DisplayChannel *display = DCC_TO_DC(dcc);
>      Stream *stream = drawable->stream;
> -    SpiceImage *image;
>      uint32_t frame_mm_time;
>      int width, height;
>      int ret;
> @@ -1670,10 +1670,10 @@ static int red_marshall_stream_data(RedChannelClient *rcc,
>          spice_assert(drawable->sized_stream);
>          stream = drawable->sized_stream;
>      }
> -    spice_assert(drawable->red_drawable->type == QXL_DRAW_COPY);
> -
> -    image = drawable->red_drawable->u.copy.src_bitmap;
> +    RedDrawable *red_drawable = drawable->red_drawable;
> +    spice_assert(red_drawable->type == QXL_DRAW_COPY);
>  
> +    SpiceImage *image = red_drawable->u.copy.src_bitmap;
>      if (image->descriptor.type != SPICE_IMAGE_TYPE_BITMAP) {
>          return FALSE;
>      }
> @@ -1715,7 +1715,8 @@ static int red_marshall_stream_data(RedChannelClient *rcc,
>                                               frame_mm_time,
>                                               &image->u.bitmap, width, height,
>                                               &drawable->red_drawable->u.copy.src_area,
> -                                             stream->top_down, &outbuf);
> +                                             stream->top_down, red_drawable,
> +                                             &outbuf);
>      switch (ret) {
>      case VIDEO_ENCODER_FRAME_DROP:
>          spice_assert(dcc->use_video_encoder_rate_control);
> diff --git a/server/gstreamer-encoder.c b/server/gstreamer-encoder.c
> index f9b3579..7929dd8 100644
> --- a/server/gstreamer-encoder.c
> +++ b/server/gstreamer-encoder.c
> @@ -30,6 +30,8 @@
>  
>  #define SPICE_GST_DEFAULT_FPS 30
>  
> +#define DO_ZERO_COPY
> +

Why do we want to  make it possible to compile out? I believe the
implementation is more robust now that it initially was? Do you still
expect some people will want to disable it?

>  
>  typedef struct {
>      SpiceBitmapFmt spice_format;
> @@ -46,6 +48,14 @@ typedef struct SpiceGstVideoBuffer {
>  typedef struct SpiceGstEncoder {
>      VideoEncoder base;
>  
> +    /* Callbacks to adjust the refcount of the bitmap being encoded. */
> +    bitmap_ref_t bitmap_ref;
> +    bitmap_unref_t bitmap_unref;
> +
> +#ifdef DO_ZERO_COPY
> +    GAsyncQueue *unused_bitmap_opaques;
> +#endif
> +
>      /* Rate control callbacks */
>      VideoEncoderRateControlCbs cbs;
>  
> @@ -483,12 +493,109 @@ static inline int line_copy(SpiceGstEncoder *encoder, const SpiceBitmap *bitmap,
>       return TRUE;
>  }
>  
> +#ifdef DO_ZERO_COPY
> +typedef struct {
> +    gint refs;
> +    SpiceGstEncoder *encoder;
> +    gpointer opaque;
> +} BitmapWrapper;
> +
> +static void clear_zero_copy_queue(SpiceGstEncoder *encoder, gboolean unref_queue)
> +{
> +    gpointer bitmap_opaque;
> +    while ((bitmap_opaque = g_async_queue_try_pop(encoder->unused_bitmap_opaques))) {
> +        encoder->bitmap_unref(bitmap_opaque);
> +    }
> +    if (unref_queue) {
> +        g_async_queue_unref(encoder->unused_bitmap_opaques);
> +    }
> +}
> +
> +static BitmapWrapper *bitmap_wrapper_new(SpiceGstEncoder *encoder, gpointer bitmap_opaque)
> +{
> +    BitmapWrapper *wrapper = spice_new(BitmapWrapper, 1);
> +    wrapper->refs = 1;
> +    wrapper->encoder = encoder;
> +    wrapper->opaque = bitmap_opaque;
> +    encoder->bitmap_ref(bitmap_opaque);
> +    return wrapper;
> +}
> +
> +static void bitmap_wrapper_unref(gpointer data)
> +{
> +    BitmapWrapper *wrapper = data;
> +    if (g_atomic_int_dec_and_test(&wrapper->refs)) {
> +        g_async_queue_push(wrapper->encoder->unused_bitmap_opaques, wrapper->opaque);
> +        free(wrapper);
> +    }
> +}
> +
> +
> +/* A helper for push_raw_frame() */
> +static inline int zero_copy(SpiceGstEncoder *encoder,
> +                            const SpiceBitmap *bitmap, gpointer bitmap_opaque,
> +                            GstBuffer *buffer, uint32_t *chunk_index,
> +                            uint32_t *chunk_offset, uint32_t *len)
> +{
> +    const SpiceChunks *chunks = bitmap->data;
> +    while (*chunk_index < chunks->num_chunks &&
> +           *chunk_offset >= chunks->chunk[*chunk_index].len) {
> +        if (is_chunk_padded(bitmap, *chunk_index)) {
> +            return FALSE;
> +        }
> +        *chunk_offset -= chunks->chunk[*chunk_index].len;
> +        (*chunk_index)++;
> +    }
> +
> +    int max_mem = gst_buffer_get_max_memory();


The gstreamer name isn't so great, but could we use something less
confusing than 'max_mem' as the variable name? I keep thinking it's a
size in bytes. 'max_block_count' maybe?

> +    if (chunks->num_chunks - *chunk_index > max_mem) {
> +        /* There are more chunks than we can fit memory objects in a
> +         * buffer. This will cause the buffer to merge memory objects to
> +         * fit the extra chunks, which means doing wasteful memory copies.
> +         * So use the zero-copy approach for the first max_mem-1 chunks, and
> +         * let push_raw_frame() deal with the rest.
> +         */
> +        max_mem = *chunk_index + max_mem - 1;
> +    } else {
> +        max_mem = chunks->num_chunks;
> +    }
> +
> +    BitmapWrapper *wrapper = NULL;
> +    while (*len && *chunk_index < max_mem) {
> +        if (is_chunk_padded(bitmap, *chunk_index)) {
> +            return FALSE;
> +        }
> +        if (wrapper) {
> +            wrapper->refs++;

It would probably be safer to only change this with g_atomic_int_inc()

Apart from these minor comments, this looks good to me.

Christophe

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Spice-devel mailing list
Spice-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/spice-devel

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]     [Monitors]