On 2/15/19 2:05 PM, Dafna Hirschfeld wrote: > In the decoder, save the inner reference frame in the same > format as the capture buffer. > The decoder writes directly to the capture buffer and then > the capture buffer is copied to the reference buffer. > This will simplify the stateless decoder. > > Signed-off-by: Dafna Hirschfeld <dafna3@xxxxxxxxx> > --- > drivers/media/platform/vicodec/codec-fwht.c | 68 +++-- > drivers/media/platform/vicodec/codec-fwht.h | 10 +- > .../media/platform/vicodec/codec-v4l2-fwht.c | 280 +++--------------- > .../media/platform/vicodec/codec-v4l2-fwht.h | 3 + > drivers/media/platform/vicodec/vicodec-core.c | 2 + > 5 files changed, 103 insertions(+), 260 deletions(-) > > diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c > index d1d6085da9f1..42849476069b 100644 > --- a/drivers/media/platform/vicodec/codec-fwht.c > +++ b/drivers/media/platform/vicodec/codec-fwht.c > @@ -632,12 +632,13 @@ static int decide_blocktype(const u8 *cur, const u8 *reference, > return vari <= vard ? IBLOCK : PBLOCK; > } > > -static void fill_decoder_block(u8 *dst, const s16 *input, int stride) > +static void fill_decoder_block(u8 *dst, const s16 *input, int stride, > + unsigned int dst_step) > { > int i, j; > > for (i = 0; i < 8; i++) { > - for (j = 0; j < 8; j++, input++, dst++) { > + for (j = 0; j < 8; j++, input++, dst += dst_step) { > if (*input < 0) > *dst = 0; > else if (*input > 255) > @@ -645,17 +646,19 @@ static void fill_decoder_block(u8 *dst, const s16 *input, int stride) > else > *dst = *input; > } > - dst += stride - 8; > + dst += stride - (8 * dst_step); > } > } > > -static void add_deltas(s16 *deltas, const u8 *ref, int stride) > +static void add_deltas(s16 *deltas, const u8 *ref, int stride, > + unsigned int ref_step) > { > int k, l; > > for (k = 0; k < 8; k++) { > for (l = 0; l < 8; l++) { > - *deltas += *ref++; > + *deltas += *ref; > + ref += ref_step; > /* > * Due to quantizing, it might possible that the > * decoded coefficients are slightly out of range > @@ -666,7 +669,7 @@ static void add_deltas(s16 *deltas, const u8 *ref, int stride) > *deltas = 255; > deltas++; > } > - ref += stride - 8; > + ref += stride - (8 * ref_step); > } > } > > @@ -711,8 +714,8 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max, > ifwht(cf->de_coeffs, cf->de_fwht, blocktype); > > if (blocktype == PBLOCK) > - add_deltas(cf->de_fwht, refp, 8); > - fill_decoder_block(refp, cf->de_fwht, 8); > + add_deltas(cf->de_fwht, refp, 8, 1); > + fill_decoder_block(refp, cf->de_fwht, 8, 1); > } > > input += 8 * input_step; > @@ -821,8 +824,10 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm, > return encoding; > } > > -static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, > - u32 height, u32 width, u32 coded_width, > +static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, > + const u8 *ref, u32 height, u32 width, u32 coded_width, > + u8 *dst, unsigned int dst_stride, > + unsigned int dst_step, unsigned int ref_step, > bool uncompressed, const __be16 *end_of_rlco_buf) > { > unsigned int copies = 0; > @@ -834,10 +839,15 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, > height = round_up(height, 8); > > if (uncompressed) { > + int i; > + > if (end_of_rlco_buf + 1 < *rlco + width * height / 2) > return false; > - memcpy(ref, *rlco, width * height); > - *rlco += width * height / 2; > + for (i = 0; i < height; i++) { > + memcpy(dst, *rlco, width); > + dst += dst_stride; > + *rlco += width / 2; > + } > return true; > } > > @@ -849,15 +859,18 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, > */ > for (j = 0; j < height / 8; j++) { > for (i = 0; i < width / 8; i++) { > - u8 *refp = ref + j * 8 * coded_width + i * 8; > + const u8 *refp = ref + j * 8 * ref_step * coded_width + > + i * 8 * ref_step; > + u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step; > > if (copies) { > memcpy(cf->de_fwht, copy, sizeof(copy)); > if (stat & PFRAME_BIT) > add_deltas(cf->de_fwht, refp, > - coded_width); > - fill_decoder_block(refp, cf->de_fwht, > - coded_width); > + coded_width * ref_step, > + ref_step); > + fill_decoder_block(dstp, cf->de_fwht, > + dst_stride, dst_step); > copies--; > continue; > } > @@ -877,23 +890,28 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, > if (copies) > memcpy(copy, cf->de_fwht, sizeof(copy)); > if (stat & PFRAME_BIT) > - add_deltas(cf->de_fwht, refp, coded_width); > - fill_decoder_block(refp, cf->de_fwht, coded_width); > + add_deltas(cf->de_fwht, refp, > + coded_width * ref_step, ref_step); > + fill_decoder_block(dstp, cf->de_fwht, dst_stride, > + dst_step); > } > } > return true; > } > > -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, > +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref, > u32 hdr_flags, unsigned int components_num, > unsigned int width, unsigned int height, > - unsigned int coded_width) > + unsigned int coded_width, struct fwht_raw_frame *dst, > + unsigned int dst_stride, unsigned int dst_chroma_stride) > { > const __be16 *rlco = cf->rlc_data; > const __be16 *end_of_rlco_buf = cf->rlc_data + > (cf->size / sizeof(*rlco)) - 1; > > if (!decode_plane(cf, &rlco, ref->luma, height, width, coded_width, > + dst->luma, dst_stride, dst->luma_alpha_step, > + ref->luma_alpha_step, > hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED, > end_of_rlco_buf)) > return false; > @@ -909,11 +927,15 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, > w /= 2; > c /= 2; > } > - if (!decode_plane(cf, &rlco, ref->cb, h, w, c, > + if (!decode_plane(cf, &rlco, ref->cb, h, w, c, dst->cb, > + dst_chroma_stride, dst->chroma_step, > + ref->chroma_step, > hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED, > end_of_rlco_buf)) > return false; > - if (!decode_plane(cf, &rlco, ref->cr, h, w, c, > + if (!decode_plane(cf, &rlco, ref->cr, h, w, c, dst->cr, > + dst_chroma_stride, dst->chroma_step, > + ref->chroma_step, > hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED, > end_of_rlco_buf)) > return false; > @@ -922,6 +944,8 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, > if (components_num == 4) > if (!decode_plane(cf, &rlco, ref->alpha, height, width, > coded_width, > + dst->alpha, dst_stride, dst->luma_alpha_step, > + ref->luma_alpha_step, > hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED, > end_of_rlco_buf)) > return false; > diff --git a/drivers/media/platform/vicodec/codec-fwht.h b/drivers/media/platform/vicodec/codec-fwht.h > index 8a4f07d466cb..eab4a97aa132 100644 > --- a/drivers/media/platform/vicodec/codec-fwht.h > +++ b/drivers/media/platform/vicodec/codec-fwht.h > @@ -140,9 +140,9 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm, > bool is_intra, bool next_is_intra, > unsigned int width, unsigned int height, > unsigned int stride, unsigned int chroma_stride); > -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, > - u32 hdr_flags, unsigned int components_num, > - unsigned int width, unsigned int height, > - unsigned int coded_width); > - > +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref, > + u32 hdr_flags, unsigned int components_num, > + unsigned int width, unsigned int height, > + unsigned int coded_width, struct fwht_raw_frame *dst, > + unsigned int dst_stride, unsigned int dst_chroma_stride); > #endif > diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c b/drivers/media/platform/vicodec/codec-v4l2-fwht.c > index 728ed5012aed..40b1f4901fd3 100644 > --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.c > +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c > @@ -75,6 +75,35 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx) > return v4l2_fwht_pixfmts + idx; > } > > +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info, > + struct v4l2_fwht_state *state) Should be a static function and cap should be const u8 *. > +{ > + int plane_idx; > + u8 *p_ref = state->ref_frame.buf; > + > + for (plane_idx = 0; plane_idx < info->planes_num; plane_idx++) { > + int i; > + bool is_chroma_plane = plane_idx == 1 || plane_idx == 2; > + unsigned int h_div = is_chroma_plane ? info->height_div : 1; > + unsigned int w_div = is_chroma_plane ? info->width_div : 1; > + unsigned int step = is_chroma_plane ? info->chroma_step : > + info->luma_alpha_step; > + unsigned int stride_div = > + (info->planes_num == 3 && plane_idx > 0) ? 2 : 1; > + > + u8 *row_dst = cap; I'd call this row_cap. 'dst' is confusing since you expect a memcpy to write to the destination address, but this isn't the 'destination', it is just a row in the capture buffer. It really confused me for a bit :-) Regards, Hans > + u8 *row_ref = p_ref; > + > + for (i = 0; i < state->visible_height / h_div; i++) { > + memcpy(row_ref, row_dst, step * state->visible_width / w_div); > + row_ref += step * state->coded_width / w_div; > + row_dst += state->stride / stride_div; > + } > + cap += (state->stride / stride_div) * (state->coded_height / h_div); > + p_ref += (step * state->coded_width / w_div) * (state->coded_height / h_div); > + } > +} > + > static int prepare_raw_frame(struct fwht_raw_frame *rf, > const struct v4l2_fwht_pixfmt_info *info, u8 *buf, > unsigned int size) > @@ -243,14 +272,16 @@ int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out) > > int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out) > { > - unsigned int i, j, k; > u32 flags; > struct fwht_cframe cf; > - u8 *p, *ref_p; > unsigned int components_num = 3; > unsigned int version; > const struct v4l2_fwht_pixfmt_info *info; > unsigned int hdr_width_div, hdr_height_div; > + struct fwht_raw_frame dst_rf; > + unsigned int dst_chroma_stride = state->stride; > + unsigned int dst_size = state->stride * state->coded_height; > + unsigned int ref_size; > > if (!state->info) > return -EINVAL; > @@ -298,241 +329,24 @@ int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out) > hdr_height_div != info->height_div) > return -EINVAL; > > - if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num, > - state->visible_width, state->visible_height, > - state->coded_width)) > + if (prepare_raw_frame(&dst_rf, info, p_out, dst_size)) > return -EINVAL; > + if (info->id == V4L2_PIX_FMT_YUV420 || > + info->id == V4L2_PIX_FMT_YVU420 || > + info->id == V4L2_PIX_FMT_YUV422P) > + dst_chroma_stride /= 2; > > - /* > - * TODO - handle the case where the compressed stream encodes a > - * different format than the requested decoded format. > - */ > - switch (state->info->id) { > - case V4L2_PIX_FMT_GREY: > - ref_p = state->ref_frame.luma; > - for (i = 0; i < state->coded_height; i++) { > - memcpy(p_out, ref_p, state->visible_width); > - p_out += state->stride; > - ref_p += state->coded_width; > - } > - break; > - case V4L2_PIX_FMT_YUV420: > - case V4L2_PIX_FMT_YUV422P: > - ref_p = state->ref_frame.luma; > - for (i = 0; i < state->coded_height; i++) { > - memcpy(p_out, ref_p, state->visible_width); > - p_out += state->stride; > - ref_p += state->coded_width; > - } > - > - ref_p = state->ref_frame.cb; > - for (i = 0; i < state->coded_height / 2; i++) { > - memcpy(p_out, ref_p, state->visible_width / 2); > - p_out += state->stride / 2; > - ref_p += state->coded_width / 2; > - } > - ref_p = state->ref_frame.cr; > - for (i = 0; i < state->coded_height / 2; i++) { > - memcpy(p_out, ref_p, state->visible_width / 2); > - p_out += state->stride / 2; > - ref_p += state->coded_width / 2; > - } > - break; > - case V4L2_PIX_FMT_YVU420: > - ref_p = state->ref_frame.luma; > - for (i = 0; i < state->coded_height; i++) { > - memcpy(p_out, ref_p, state->visible_width); > - p_out += state->stride; > - ref_p += state->coded_width; > - } > - > - ref_p = state->ref_frame.cr; > - for (i = 0; i < state->coded_height / 2; i++) { > - memcpy(p_out, ref_p, state->visible_width / 2); > - p_out += state->stride / 2; > - ref_p += state->coded_width / 2; > - } > - ref_p = state->ref_frame.cb; > - for (i = 0; i < state->coded_height / 2; i++) { > - memcpy(p_out, ref_p, state->visible_width / 2); > - p_out += state->stride / 2; > - ref_p += state->coded_width / 2; > - } > - break; > - case V4L2_PIX_FMT_NV12: > - case V4L2_PIX_FMT_NV16: > - case V4L2_PIX_FMT_NV24: > - ref_p = state->ref_frame.luma; > - for (i = 0; i < state->coded_height; i++) { > - memcpy(p_out, ref_p, state->visible_width); > - p_out += state->stride; > - ref_p += state->coded_width; > - } > + ref_size = state->coded_width * state->coded_height * > + info->luma_alpha_step; > > - k = 0; > - for (i = 0; i < state->coded_height / 2; i++) { > - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { > - *p++ = state->ref_frame.cb[k]; > - *p++ = state->ref_frame.cr[k]; > - k++; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_NV21: > - case V4L2_PIX_FMT_NV61: > - case V4L2_PIX_FMT_NV42: > - ref_p = state->ref_frame.luma; > - for (i = 0; i < state->coded_height; i++) { > - memcpy(p_out, ref_p, state->visible_width); > - p_out += state->stride; > - ref_p += state->coded_width; > - } > + if (prepare_raw_frame(&state->ref_frame, info, state->ref_frame.buf, > + ref_size)) > + return -EINVAL; > > - k = 0; > - for (i = 0; i < state->coded_height / 2; i++) { > - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { > - *p++ = state->ref_frame.cr[k]; > - *p++ = state->ref_frame.cb[k]; > - k++; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_YUYV: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cb[k / 2]; > - *p++ = state->ref_frame.luma[k + 1]; > - *p++ = state->ref_frame.cr[k / 2]; > - k += 2; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_YVYU: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cr[k / 2]; > - *p++ = state->ref_frame.luma[k + 1]; > - *p++ = state->ref_frame.cb[k / 2]; > - k += 2; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_UYVY: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { > - *p++ = state->ref_frame.cb[k / 2]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cr[k / 2]; > - *p++ = state->ref_frame.luma[k + 1]; > - k += 2; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_VYUY: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { > - *p++ = state->ref_frame.cr[k / 2]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cb[k / 2]; > - *p++ = state->ref_frame.luma[k + 1]; > - k += 2; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_RGB24: > - case V4L2_PIX_FMT_HSV24: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width; j++) { > - *p++ = state->ref_frame.cr[k]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cb[k]; > - k++; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_BGR24: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width; j++) { > - *p++ = state->ref_frame.cb[k]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cr[k]; > - k++; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_RGB32: > - case V4L2_PIX_FMT_XRGB32: > - case V4L2_PIX_FMT_HSV32: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width; j++) { > - *p++ = 0; > - *p++ = state->ref_frame.cr[k]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cb[k]; > - k++; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_BGR32: > - case V4L2_PIX_FMT_XBGR32: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width; j++) { > - *p++ = state->ref_frame.cb[k]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cr[k]; > - *p++ = 0; > - k++; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_ARGB32: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width; j++) { > - *p++ = state->ref_frame.alpha[k]; > - *p++ = state->ref_frame.cr[k]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cb[k]; > - k++; > - } > - p_out += state->stride; > - } > - break; > - case V4L2_PIX_FMT_ABGR32: > - k = 0; > - for (i = 0; i < state->coded_height; i++) { > - for (j = 0, p = p_out; j < state->coded_width; j++) { > - *p++ = state->ref_frame.cb[k]; > - *p++ = state->ref_frame.luma[k]; > - *p++ = state->ref_frame.cr[k]; > - *p++ = state->ref_frame.alpha[k]; > - k++; > - } > - p_out += state->stride; > - } > - break; > - default: > + if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num, > + state->visible_width, state->visible_height, > + state->coded_width, &dst_rf, state->stride, > + dst_chroma_stride)) > return -EINVAL; > - } > return 0; > } > diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.h b/drivers/media/platform/vicodec/codec-v4l2-fwht.h > index aa6fa90a48be..75343cdf45e2 100644 > --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.h > +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.h > @@ -53,6 +53,9 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_default_fmt(u32 width_div, > u32 pixenc, > unsigned int start_idx); > > +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info, > + struct v4l2_fwht_state *state); > + > int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out); > int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out); > > diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c > index 8d38bc1ef079..335a931fdf02 100644 > --- a/drivers/media/platform/vicodec/vicodec-core.c > +++ b/drivers/media/platform/vicodec/vicodec-core.c > @@ -194,6 +194,8 @@ static int device_process(struct vicodec_ctx *ctx, > ret = v4l2_fwht_decode(state, p_src, p_dst); > if (ret < 0) > return ret; > + copy_cap_to_ref(p_dst, ctx->state.info, &ctx->state); > + > vb2_set_plane_payload(&dst_vb->vb2_buf, 0, q_dst->sizeimage); > } > >