On Mon, Oct 15, 2018 at 10:38:35AM +0300, Stanislav Lisovskiy wrote: > v2: Renamed DRM_FORMAT_XYUV to DRM_FORMAT_XYUV8888. > Added comment about AYUV byte ordering in Gstreamer. > > v3: Removed sna_composite_op flags related change to the separate patch. > > v4: Fixed review comments, done code refactoring > > v5: Fixed following review comments: > - Fixed comment in shader code for ayuv kernel. > - Fixed naming to VIDEO_AYUV_BT601/BT709 for ayuv kernels. > - Removed duplicate gen9_kernel parameter, left from previous patches > - Added colorspace handling for new AYUV kernel > - Fixed naming of sna_copy_packed_data_ayuv to sna_copy_ayuv_data > - Started using standard bswap_32 function for byte swapping in sna_copy_ayuv_data > - Removed redundant code in sna_copy_ayuv_data so that it looks more neat > - Fixed XVIMAGE_AYUV structure initialization to contain proper byte sequence for GST > - Fixed bogus comment about subsampling for DRM_FORMAT_XYUV8888 > - Fixed AYUV advertisement for all platforms > - Removed unnecessary RGB888 declaration. > > Signed-off-by: Stanislav Lisovskiy <stanislav.lisovskiy@xxxxxxxxx> > --- > src/render_program/Makefile.am | 2 + > .../exa_wm_src_sample_argb_ayuv.g8a | 66 ++++++++++ > .../exa_wm_src_sample_argb_ayuv.g8b | 6 + > src/sna/gen9_render.c | 24 +++- > src/sna/sna_render.h | 3 + > src/sna/sna_video.c | 117 ++++++++++++++++++ > src/sna/sna_video.h | 20 +++ > src/sna/sna_video_sprite.c | 22 +++- > src/sna/sna_video_textured.c | 7 ++ > 9 files changed, 262 insertions(+), 5 deletions(-) > create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8a > create mode 100644 src/render_program/exa_wm_src_sample_argb_ayuv.g8b > > diff --git a/src/render_program/Makefile.am b/src/render_program/Makefile.am > index dc58138f..e35ffa52 100644 > --- a/src/render_program/Makefile.am > +++ b/src/render_program/Makefile.am > @@ -196,6 +196,7 @@ INTEL_G7B = \ > INTEL_G8A = \ > exa_wm_src_affine.g8a \ > exa_wm_src_sample_argb.g8a \ > + exa_wm_src_sample_argb_ayuv.g8a \ > exa_wm_src_sample_nv12.g8a \ > exa_wm_src_sample_planar.g8a \ > exa_wm_write.g8a \ > @@ -205,6 +206,7 @@ INTEL_G8A = \ > > INTEL_G8B = \ > exa_wm_src_affine.g8b \ > + exa_wm_src_sample_argb_ayuv.g8b \ > exa_wm_src_sample_argb.g8b \ > exa_wm_src_sample_nv12.g8b \ > exa_wm_src_sample_planar.g8b \ > diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8a b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a > new file mode 100644 > index 00000000..16cd54c6 > --- /dev/null > +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8a > @@ -0,0 +1,66 @@ > +/* > + * Copyright © 2006 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > + * IN THE SOFTWARE. > + * > + * Authors: > + * Wang Zhenyu <zhenyu.z.wang@xxxxxxxxx> > + * Keith Packard <keithp@xxxxxxxxxx> > + */ > + > +/* Sample the src surface */ > + > +include(`exa_wm.g4i') > + > +undefine(`src_msg') > +undefine(`src_msg_ind') > + > +define(`src_msg', `g65') > +define(`src_msg_ind', `65') > + > +/* prepare sampler read back gX register, which would be written back to output */ > + > +/* use simd16 sampler, param 0 is u, param 1 is v. */ > +/* 'payload' loading, assuming tex coord start from g4 */ > + > +/* load argb */ > +mov (1) g0.8<1>UD 0x00000000UD { align1 mask_disable }; > +mov (8) src_msg<1>UD g0<8,8,1>UD { align1 }; /* copy to msg start reg*/ > + > +/* src_msg will be copied with g0, as it contains send desc */ > +/* emit sampler 'send' cmd */ > +send (16) src_msg_ind /* msg reg index */ > + src_sample_base<1>UW /* readback */ > + null > + sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) > + /* here(src->dst) we should use src_sampler and src_surface */ > + mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */ > + > + > +/* > + * Have to change bytes order, because the only > + * player which supports AYUV format currently is > + * Gstreamer and it supports in bad way, even though > + * spec says MSB:AYUV, we get the bytes opposite way. > + */ > +mov (16) src_sample_b<1>UD src_sample_r<1>UD { align1 }; > +mov (16) src_sample_r<1>UD src_sample_a<1>UD { align1 }; > +mov (16) src_sample_a<1>F 1.0F; > + > diff --git a/src/render_program/exa_wm_src_sample_argb_ayuv.g8b b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b > new file mode 100644 > index 00000000..4f439141 > --- /dev/null > +++ b/src/render_program/exa_wm_src_sample_argb_ayuv.g8b > @@ -0,0 +1,6 @@ > + { 0x00000001, 0x2008060c, 0x00000000, 0x00000000 }, > + { 0x00600001, 0x28200208, 0x008d0000, 0x00000000 }, > + { 0x02800031, 0x21c00a48, 0x06000820, 0x0a8c0001 }, > + { 0x00800001, 0x22400208, 0x002001c0, 0x00000000 }, > + { 0x00800001, 0x21c00208, 0x00200280, 0x00000000 }, > + { 0x00800001, 0x22803ee8, 0x38000000, 0x3f800000 }, > diff --git a/src/sna/gen9_render.c b/src/sna/gen9_render.c > index eb22b642..c2c45e1e 100644 > --- a/src/sna/gen9_render.c > +++ b/src/sna/gen9_render.c > @@ -129,6 +129,20 @@ static const uint32_t ps_kernel_planar_bt709[][4] = { > #include "exa_wm_write.g8b" > }; > > +static const uint32_t ps_kernel_ayuv_bt601[][4] = { > +#include "exa_wm_src_affine.g8b" > +#include "exa_wm_src_sample_argb_ayuv.g8b" > +#include "exa_wm_yuv_rgb_bt601.g8b" > +#include "exa_wm_write.g8b" > +}; > + > +static const uint32_t ps_kernel_ayuv_bt709[][4] = { > +#include "exa_wm_src_affine.g8b" > +#include "exa_wm_src_sample_argb_ayuv.g8b" > +#include "exa_wm_yuv_rgb_bt709.g8b" > +#include "exa_wm_write.g8b" > +}; > + > static const uint32_t ps_kernel_nv12_bt709[][4] = { > #include "exa_wm_src_affine.g8b" > #include "exa_wm_src_sample_nv12.g8b" > @@ -177,6 +191,8 @@ static const struct wm_kernel_info { > KERNEL(VIDEO_PLANAR_BT709, ps_kernel_planar_bt709, 7), > KERNEL(VIDEO_NV12_BT709, ps_kernel_nv12_bt709, 7), > KERNEL(VIDEO_PACKED_BT709, ps_kernel_packed_bt709, 2), > + KERNEL(VIDEO_AYUV_BT601, ps_kernel_ayuv_bt601, 2), > + KERNEL(VIDEO_AYUV_BT709, ps_kernel_ayuv_bt709, 2), > KERNEL(VIDEO_RGB, ps_kernel_rgb, 2), > #endif > }; > @@ -2552,7 +2568,6 @@ gen9_render_composite(struct sna *sna, > tmp->mask.bo != NULL, > tmp->has_component_alpha, > tmp->is_affine); > - > tmp->blt = gen9_render_composite_blt; > tmp->box = gen9_render_composite_box; > tmp->boxes = gen9_render_composite_boxes__blt; > @@ -3853,6 +3868,8 @@ static void gen9_emit_video_state(struct sna *sna, > src_surf_format[0] = SURFACEFORMAT_B8G8R8X8_UNORM; > else if (frame->id == FOURCC_UYVY) > src_surf_format[0] = SURFACEFORMAT_YCRCB_SWAPY; > + else if (is_ayuv_fourcc(frame->id)) > + src_surf_format[0] = SURFACEFORMAT_B8G8R8A8_UNORM; You ignore the alpha in the shader, so might as well use B8G8R8X8_UNORM for this one as well. > else > src_surf_format[0] = SURFACEFORMAT_YCRCB_NORMAL; > > @@ -3903,6 +3920,11 @@ static unsigned select_video_kernel(const struct sna_video *video, > case FOURCC_RGB565: > return GEN9_WM_KERNEL_VIDEO_RGB; > > + case FOURCC_AYUV: > + return video->colorspace ? > + GEN9_WM_KERNEL_VIDEO_AYUV_BT709 : > + GEN9_WM_KERNEL_VIDEO_AYUV_BT601; > + > default: > return video->colorspace ? > GEN9_WM_KERNEL_VIDEO_PACKED_BT709 : > diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h > index a4e5b56a..891fc905 100644 > --- a/src/sna/sna_render.h > +++ b/src/sna/sna_render.h > @@ -617,6 +617,9 @@ enum { > GEN9_WM_KERNEL_VIDEO_NV12_BT709, > GEN9_WM_KERNEL_VIDEO_PACKED_BT709, > > + GEN9_WM_KERNEL_VIDEO_AYUV_BT601, > + GEN9_WM_KERNEL_VIDEO_AYUV_BT709, > + > GEN9_WM_KERNEL_VIDEO_RGB, > GEN9_WM_KERNEL_COUNT > }; > diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c > index 55405f81..75bdc7d0 100644 > --- a/src/sna/sna_video.c > +++ b/src/sna/sna_video.c > @@ -59,6 +59,7 @@ > #include "intel_options.h" > > #include <xf86xv.h> > +#include <byteswap.h> > > #ifdef SNA_XVMC > #define _SNA_XVMC_SERVER_ > @@ -281,6 +282,7 @@ sna_video_frame_set_rotation(struct sna_video *video, > } else { > switch (frame->id) { > case FOURCC_RGB888: > + case FOURCC_AYUV: > if (rotation & (RR_Rotate_90 | RR_Rotate_270)) { > frame->pitch[0] = ALIGN((height << 2), align); > frame->size = (int)frame->pitch[0] * width; > @@ -584,6 +586,118 @@ sna_copy_packed_data(struct sna_video *video, > } > } > > +static void > +sna_copy_ayuv_data(struct sna_video *video, > + const struct sna_video_frame *frame, > + const uint8_t *buf, > + uint8_t *dst) > +{ > + int pitch = frame->width << 2; > + const uint8_t *src, *s; > + const uint32_t *src_dw; > + uint32_t *dst_dw = (uint32_t *)dst; > + int x, y, w, h; > + int i, j; > + > + if (video->textured) { > + /* XXX support copying cropped extents */ > + x = y = 0; > + w = frame->width; > + h = frame->height; > + } else { > + x = frame->image.x1; > + y = frame->image.y1; > + w = frame->image.x2 - frame->image.x1; > + h = frame->image.y2 - frame->image.y1; > + } > + > + src = buf + (y * pitch) + (x << 2); > + src_dw = (uint32_t *)src; > + switch (frame->rotation) { > + case RR_Rotate_0: > + for (i = 0; i < h; i++) { > + for (j = 0; j < w; j++) { > + uint32_t reverse_dw, dw = src_dw[i * w + j]; > + if (!video->textured) { > + /* > + * For textured we do byte reversing in shader. > + * Have to reverse bytes order, because the only > + * player which supports AYUV format currently is > + * Gstreamer and it supports in bad way, even though > + * spec says MSB:AYUV, we get the bytes opposite way. > + */ > + reverse_dw = bswap_32(dw); > + } > + else > + reverse_dw = dw; I think I said before that it might be better to just byteswap every time. No extra branch in the inner loop that way. It would also limit the "gst is nuts" workaround into one place instead of duplicating it several times. The one downside is that we always need to take this path for AYUV. But we need to do that for the sprite adaptor anyway so I think it would be fine. Indcidentally you seem to be missing the pwrite/memcpy path for AYUV in sna_video_copy_data() but you don't check for that. So I think we need something along the lines of - if (frame->rotation == RR_Rotate_0 && !video->tiled) { + if (frame->rotation == RR_Rotate_0 && !video->tiled && !is_ayuv()) { > + dst_dw[i * w + j] = reverse_dw; > + } > + } > + break; > + case RR_Rotate_90: > + for (i = 0; i < h; i++) { > + for (j = 0;j < w; j++) { > + uint32_t reverse_dw, dw = src_dw[i * w + j]; > + if (!video->textured) { > + /* > + * For textured we do byte reversing in shader. > + * Have to reverse bytes order, because the only > + * player which supports AYUV format currently is > + * Gstreamer and it supports in bad way, even though > + * spec says MSB:AYUV, we get the bytes opposite way. > + */ > + reverse_dw = bswap_32(dw); > + } > + else > + reverse_dw = dw; > + dst_dw[(w - j - 1) * h + i] = reverse_dw; > + } > + } > + break; > + case RR_Rotate_180: > + for (i = 0; i < h; i++) { > + for (j = 0; j < w; j++) { > + uint32_t reverse_dw, dw = src_dw[i * w + j]; > + /* Columns in reverse order, rows in reverse order */ > + if (!video->textured) { > + /* > + * For textured we do byte reversing in shader. > + * Have to reverse bytes order, because the only > + * player which supports AYUV format currently is > + * Gstreamer and it supports in bad way, even though > + * spec says MSB:AYUV, we get the bytes opposite way. > + */ > + reverse_dw = bswap_32(dw); > + } > + else > + reverse_dw = dw; > + dst_dw[(h - i - 1) * w + w - j - 1] = reverse_dw; > + } > + } > + break; > + case RR_Rotate_270: > + for (i = 0; i < h; i++) { > + for (j = 0; j < w; j++) { > + uint32_t reverse_dw, dw = src_dw[i * w + j];; > + if (!video->textured) { > + /* > + * For textured we do byte reversing in shader. > + * Have to reverse bytes order, because the only > + * player which supports AYUV format currently is > + * Gstreamer and it supports in bad way, even though > + * spec says MSB:AYUV, we get the bytes opposite way. > + */ > + reverse_dw = bswap_32(dw); > + } > + else > + reverse_dw = dw; > + dst_dw[(w - j - 1) * h + i] = reverse_dw; > + } > + } > + break; > + } > +} > + > bool > sna_video_copy_data(struct sna_video *video, > struct sna_video_frame *frame, > @@ -709,6 +823,9 @@ use_gtt: /* copy data, must use GTT so that we keep the overlay uncached */ > sna_copy_nv12_data(video, frame, buf, dst); > else if (is_planar_fourcc(frame->id)) > sna_copy_planar_data(video, frame, buf, dst); > + else if (is_ayuv_fourcc(frame->id)) > + /* Some hardcoding is done in default sna_copy_packed_data, so added a specific function */ sna_copy_packed_data() is meant for packed YUV 4:2:2 formats, so the fact that it can't be used is self evident. So just drop the commit IMO. > + sna_copy_ayuv_data(video, frame, buf, dst); > else > sna_copy_packed_data(video, frame, buf, dst); > > diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h > index bbd3f0fd..3e675f84 100644 > --- a/src/sna/sna_video.h > +++ b/src/sna/sna_video.h > @@ -39,6 +39,7 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE. > #define FOURCC_RGB565 ((16 << 24) + ('B' << 16) + ('G' << 8) + 'R') > #define FOURCC_RGB888 ((24 << 24) + ('B' << 16) + ('G' << 8) + 'R') > #define FOURCC_NV12 (('2' << 24) + ('1' << 16) + ('V' << 8) + 'N') > +#define FOURCC_AYUV (('V' << 24) + ('U' << 16) + ('Y' << 8) + 'A') > > /* > * Below, a dummy picture type that is used in XvPutImage > @@ -79,6 +80,15 @@ THE USE OR OTHER DEALINGS IN THE SOFTWARE. > XvTopToBottom \ > } > > +#define XVIMAGE_AYUV { \ > + FOURCC_AYUV, XvYUV, LSBFirst, \ > + {'A', 'Y', 'U', 'V', 0x00, 0x00, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, 0x38, 0x9B, 0x71}, \ > + 32, XvPacked, 1, 0, 0, 0, 0, 8, 8, 8, 1, 2, 2, 1, 2, 2, \ The 2s should be 1s. There is no chroma subsampling with this format. > + {'A', 'Y', 'U', 'V', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, \ > + XvTopToBottom \ > +} > + > + > struct sna_video { > struct sna *sna; > > @@ -189,6 +199,16 @@ static inline int is_nv12_fourcc(int id) > } > } > > +static inline int is_ayuv_fourcc(int id) > +{ > + switch (id) { > + case FOURCC_AYUV: > + return 1; > + default: > + return 0; > + } > +} > + > bool > sna_video_clip_helper(struct sna_video *video, > struct sna_video_frame *frame, > diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c > index 8b7ae8ae..ae493daa 100644 > --- a/src/sna/sna_video_sprite.c > +++ b/src/sna/sna_video_sprite.c > @@ -47,7 +47,7 @@ > #define DRM_FORMAT_YUYV fourcc_code('Y', 'U', 'Y', 'V') /* [31:0] Cr0:Y1:Cb0:Y0 8:8:8:8 little endian */ > #define DRM_FORMAT_UYVY fourcc_code('U', 'Y', 'V', 'Y') /* [31:0] Y1:Cr0:Y0:Cb0 8:8:8:8 little endian */ > #define DRM_FORMAT_NV12 fourcc_code('N', 'V', '1', '2') /* 2x2 subsampled Cr:Cb plane */ > - > +#define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] x:Y:U:V 8:8:8:8 little endian */ > #define has_hw_scaling(sna, video) ((sna)->kgem.gen < 071 || \ > (sna)->kgem.gen >= 0110) > > @@ -76,9 +76,11 @@ static XvFormatRec formats[] = { {15}, {16}, {24} }; > static const XvImageRec images[] = { XVIMAGE_YUY2, XVIMAGE_UYVY, > XVMC_RGB888 }; > static const XvImageRec images_rgb565[] = { XVIMAGE_YUY2, XVIMAGE_UYVY, > - XVMC_RGB888, XVMC_RGB565 }; > + XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV }; > static const XvImageRec images_nv12[] = { XVIMAGE_YUY2, XVIMAGE_UYVY, > - XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565 }; > + XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV }; These should remain unchanged. > +static const XvImageRec images_xyuv8888[] = { XVIMAGE_YUY2, XVIMAGE_UYVY, > + XVIMAGE_NV12, XVMC_RGB888, XVMC_RGB565, XVIMAGE_AYUV }; I'd call this images_ayuv[] since this is about xvimages and not drm fourccs. And I'd put AYUV before the RGB formats in the list. > static const XvAttributeRec attribs[] = { > { XvSettable | XvGettable, 0, 1, (char *)"XV_COLORSPACE" }, /* BT.601, BT.709 */ > { XvSettable | XvGettable, 0, 0xffffff, (char *)"XV_COLORKEY" }, > @@ -364,6 +366,10 @@ sna_video_sprite_show(struct sna *sna, > case FOURCC_UYVY: > f.pixel_format = DRM_FORMAT_UYVY; > break; > + case FOURCC_AYUV: > + /* i915 doesn't support alpha, so we use XYUV */ > + f.pixel_format = DRM_FORMAT_XYUV8888; > + break; > case FOURCC_YUY2: > default: > f.pixel_format = DRM_FORMAT_YUYV; > @@ -705,7 +711,12 @@ static int sna_video_sprite_query(ddQueryImageAttributes_ARGS) > tmp *= (*h >> 1); > size += tmp; > break; > - > + case FOURCC_AYUV: > + tmp = *w << 2; > + if (pitches) > + pitches[0] = tmp; > + size = *h * tmp; > + break; > default: > *w = (*w + 1) & ~1; > *h = (*h + 1) & ~1; > @@ -811,6 +822,9 @@ void sna_video_sprite_setup(struct sna *sna, ScreenPtr screen) > } else if (sna_has_sprite_format(sna, DRM_FORMAT_RGB565)) { > adaptor->pImages = (XvImageRec *)images_rgb565; > adaptor->nImages = ARRAY_SIZE(images_rgb565); > + } else if (sna_has_sprite_format(sna, DRM_FORMAT_XYUV8888)) { > + adaptor->pImages = (XvImageRec *)images_xyuv8888; > + adaptor->nImages = ARRAY_SIZE(images_xyuv8888); This needs to be checked before 565/nv12. Otherwise we'll never pick this one on most SKL+ planes. > } else { > adaptor->pImages = (XvImageRec *)images; > adaptor->nImages = ARRAY_SIZE(images); > diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c > index a784fe2e..46c213ef 100644 > --- a/src/sna/sna_video_textured.c > +++ b/src/sna/sna_video_textured.c > @@ -68,6 +68,7 @@ static const XvImageRec gen4_Images[] = { > XVIMAGE_I420, > XVIMAGE_NV12, > XVIMAGE_UYVY, > + XVIMAGE_AYUV, > XVMC_YUV, > }; > > @@ -337,6 +338,12 @@ sna_video_textured_query(ddQueryImageAttributes_ARGS) > pitches[0] = size; > size *= *h; > break; > + case FOURCC_AYUV: > + size = *w << 2; > + if (pitches) > + pitches[0] = size; > + size *= *h; > + break; > case FOURCC_XVMC: > *h = (*h + 1) & ~1; > size = sizeof(uint32_t); > -- > 2.17.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx -- Ville Syrjälä Intel _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx