On 2/19/19 11:30 AM, Hans Verkuil wrote: > On 2/15/19 2:05 PM, Dafna Hirschfeld wrote: >> In the decoder, save the inner reference frame in the same >> format as the capture buffer. >> The decoder writes directly to the capture buffer and then >> the capture buffer is copied to the reference buffer. >> This will simplify the stateless decoder. >> >> Signed-off-by: Dafna Hirschfeld <dafna3@xxxxxxxxx> >> --- >> drivers/media/platform/vicodec/codec-fwht.c | 68 +++-- >> drivers/media/platform/vicodec/codec-fwht.h | 10 +- >> .../media/platform/vicodec/codec-v4l2-fwht.c | 280 +++--------------- >> .../media/platform/vicodec/codec-v4l2-fwht.h | 3 + >> drivers/media/platform/vicodec/vicodec-core.c | 2 + >> 5 files changed, 103 insertions(+), 260 deletions(-) >> >> diff --git a/drivers/media/platform/vicodec/codec-fwht.c b/drivers/media/platform/vicodec/codec-fwht.c >> index d1d6085da9f1..42849476069b 100644 >> --- a/drivers/media/platform/vicodec/codec-fwht.c >> +++ b/drivers/media/platform/vicodec/codec-fwht.c >> @@ -632,12 +632,13 @@ static int decide_blocktype(const u8 *cur, const u8 *reference, >> return vari <= vard ? IBLOCK : PBLOCK; >> } >> >> -static void fill_decoder_block(u8 *dst, const s16 *input, int stride) >> +static void fill_decoder_block(u8 *dst, const s16 *input, int stride, >> + unsigned int dst_step) >> { >> int i, j; >> >> for (i = 0; i < 8; i++) { >> - for (j = 0; j < 8; j++, input++, dst++) { >> + for (j = 0; j < 8; j++, input++, dst += dst_step) { >> if (*input < 0) >> *dst = 0; >> else if (*input > 255) >> @@ -645,17 +646,19 @@ static void fill_decoder_block(u8 *dst, const s16 *input, int stride) >> else >> *dst = *input; >> } >> - dst += stride - 8; >> + dst += stride - (8 * dst_step); >> } >> } >> >> -static void add_deltas(s16 *deltas, const u8 *ref, int stride) >> +static void add_deltas(s16 *deltas, const u8 *ref, int stride, >> + unsigned int ref_step) >> { >> int k, l; >> >> for (k = 0; k < 8; k++) { >> for (l = 0; l < 8; l++) { >> - *deltas += *ref++; >> + *deltas += *ref; >> + ref += ref_step; >> /* >> * Due to quantizing, it might possible that the >> * decoded coefficients are slightly out of range >> @@ -666,7 +669,7 @@ static void add_deltas(s16 *deltas, const u8 *ref, int stride) >> *deltas = 255; >> deltas++; >> } >> - ref += stride - 8; >> + ref += stride - (8 * ref_step); >> } >> } >> >> @@ -711,8 +714,8 @@ static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max, >> ifwht(cf->de_coeffs, cf->de_fwht, blocktype); >> >> if (blocktype == PBLOCK) >> - add_deltas(cf->de_fwht, refp, 8); >> - fill_decoder_block(refp, cf->de_fwht, 8); >> + add_deltas(cf->de_fwht, refp, 8, 1); >> + fill_decoder_block(refp, cf->de_fwht, 8, 1); >> } >> >> input += 8 * input_step; >> @@ -821,8 +824,10 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm, >> return encoding; >> } >> >> -static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, >> - u32 height, u32 width, u32 coded_width, >> +static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, >> + const u8 *ref, u32 height, u32 width, u32 coded_width, >> + u8 *dst, unsigned int dst_stride, >> + unsigned int dst_step, unsigned int ref_step, >> bool uncompressed, const __be16 *end_of_rlco_buf) >> { >> unsigned int copies = 0; >> @@ -834,10 +839,15 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, >> height = round_up(height, 8); >> >> if (uncompressed) { >> + int i; >> + >> if (end_of_rlco_buf + 1 < *rlco + width * height / 2) >> return false; >> - memcpy(ref, *rlco, width * height); >> - *rlco += width * height / 2; >> + for (i = 0; i < height; i++) { >> + memcpy(dst, *rlco, width); >> + dst += dst_stride; >> + *rlco += width / 2; >> + } >> return true; >> } >> >> @@ -849,15 +859,18 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, >> */ >> for (j = 0; j < height / 8; j++) { >> for (i = 0; i < width / 8; i++) { >> - u8 *refp = ref + j * 8 * coded_width + i * 8; >> + const u8 *refp = ref + j * 8 * ref_step * coded_width + >> + i * 8 * ref_step; >> + u8 *dstp = dst + j * 8 * dst_stride + i * 8 * dst_step; >> >> if (copies) { >> memcpy(cf->de_fwht, copy, sizeof(copy)); >> if (stat & PFRAME_BIT) >> add_deltas(cf->de_fwht, refp, >> - coded_width); >> - fill_decoder_block(refp, cf->de_fwht, >> - coded_width); >> + coded_width * ref_step, >> + ref_step); >> + fill_decoder_block(dstp, cf->de_fwht, >> + dst_stride, dst_step); >> copies--; >> continue; >> } >> @@ -877,23 +890,28 @@ static bool decode_plane(struct fwht_cframe *cf, const __be16 **rlco, u8 *ref, >> if (copies) >> memcpy(copy, cf->de_fwht, sizeof(copy)); >> if (stat & PFRAME_BIT) >> - add_deltas(cf->de_fwht, refp, coded_width); >> - fill_decoder_block(refp, cf->de_fwht, coded_width); >> + add_deltas(cf->de_fwht, refp, >> + coded_width * ref_step, ref_step); >> + fill_decoder_block(dstp, cf->de_fwht, dst_stride, >> + dst_step); >> } >> } >> return true; >> } >> >> -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, >> +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref, >> u32 hdr_flags, unsigned int components_num, >> unsigned int width, unsigned int height, >> - unsigned int coded_width) >> + unsigned int coded_width, struct fwht_raw_frame *dst, >> + unsigned int dst_stride, unsigned int dst_chroma_stride) >> { >> const __be16 *rlco = cf->rlc_data; >> const __be16 *end_of_rlco_buf = cf->rlc_data + >> (cf->size / sizeof(*rlco)) - 1; >> >> if (!decode_plane(cf, &rlco, ref->luma, height, width, coded_width, >> + dst->luma, dst_stride, dst->luma_alpha_step, >> + ref->luma_alpha_step, >> hdr_flags & FWHT_FL_LUMA_IS_UNCOMPRESSED, >> end_of_rlco_buf)) >> return false; >> @@ -909,11 +927,15 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, >> w /= 2; >> c /= 2; >> } >> - if (!decode_plane(cf, &rlco, ref->cb, h, w, c, >> + if (!decode_plane(cf, &rlco, ref->cb, h, w, c, dst->cb, >> + dst_chroma_stride, dst->chroma_step, >> + ref->chroma_step, >> hdr_flags & FWHT_FL_CB_IS_UNCOMPRESSED, >> end_of_rlco_buf)) >> return false; >> - if (!decode_plane(cf, &rlco, ref->cr, h, w, c, >> + if (!decode_plane(cf, &rlco, ref->cr, h, w, c, dst->cr, >> + dst_chroma_stride, dst->chroma_step, >> + ref->chroma_step, >> hdr_flags & FWHT_FL_CR_IS_UNCOMPRESSED, >> end_of_rlco_buf)) >> return false; >> @@ -922,6 +944,8 @@ bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, >> if (components_num == 4) >> if (!decode_plane(cf, &rlco, ref->alpha, height, width, >> coded_width, >> + dst->alpha, dst_stride, dst->luma_alpha_step, >> + ref->luma_alpha_step, >> hdr_flags & FWHT_FL_ALPHA_IS_UNCOMPRESSED, >> end_of_rlco_buf)) >> return false; >> diff --git a/drivers/media/platform/vicodec/codec-fwht.h b/drivers/media/platform/vicodec/codec-fwht.h >> index 8a4f07d466cb..eab4a97aa132 100644 >> --- a/drivers/media/platform/vicodec/codec-fwht.h >> +++ b/drivers/media/platform/vicodec/codec-fwht.h >> @@ -140,9 +140,9 @@ u32 fwht_encode_frame(struct fwht_raw_frame *frm, >> bool is_intra, bool next_is_intra, >> unsigned int width, unsigned int height, >> unsigned int stride, unsigned int chroma_stride); >> -bool fwht_decode_frame(struct fwht_cframe *cf, struct fwht_raw_frame *ref, >> - u32 hdr_flags, unsigned int components_num, >> - unsigned int width, unsigned int height, >> - unsigned int coded_width); >> - >> +bool fwht_decode_frame(struct fwht_cframe *cf, const struct fwht_raw_frame *ref, >> + u32 hdr_flags, unsigned int components_num, >> + unsigned int width, unsigned int height, >> + unsigned int coded_width, struct fwht_raw_frame *dst, >> + unsigned int dst_stride, unsigned int dst_chroma_stride); >> #endif >> diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.c b/drivers/media/platform/vicodec/codec-v4l2-fwht.c >> index 728ed5012aed..40b1f4901fd3 100644 >> --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.c >> +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.c >> @@ -75,6 +75,35 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_get_pixfmt(u32 idx) >> return v4l2_fwht_pixfmts + idx; >> } >> >> +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info, >> + struct v4l2_fwht_state *state) > > Should be a static function and cap should be const u8 *. Sorry, I misread. This function is used in vicodec-core, so it can't be static. But wouldn't it make more sense to move this function to vicodec-core.c as well? Regards, Hans > >> +{ >> + int plane_idx; >> + u8 *p_ref = state->ref_frame.buf; >> + >> + for (plane_idx = 0; plane_idx < info->planes_num; plane_idx++) { >> + int i; >> + bool is_chroma_plane = plane_idx == 1 || plane_idx == 2; >> + unsigned int h_div = is_chroma_plane ? info->height_div : 1; >> + unsigned int w_div = is_chroma_plane ? info->width_div : 1; >> + unsigned int step = is_chroma_plane ? info->chroma_step : >> + info->luma_alpha_step; >> + unsigned int stride_div = >> + (info->planes_num == 3 && plane_idx > 0) ? 2 : 1; >> + >> + u8 *row_dst = cap; > > I'd call this row_cap. 'dst' is confusing since you expect a memcpy to write > to the destination address, but this isn't the 'destination', it is just a > row in the capture buffer. > > It really confused me for a bit :-) > > Regards, > > Hans > >> + u8 *row_ref = p_ref; >> + >> + for (i = 0; i < state->visible_height / h_div; i++) { >> + memcpy(row_ref, row_dst, step * state->visible_width / w_div); >> + row_ref += step * state->coded_width / w_div; >> + row_dst += state->stride / stride_div; >> + } >> + cap += (state->stride / stride_div) * (state->coded_height / h_div); >> + p_ref += (step * state->coded_width / w_div) * (state->coded_height / h_div); >> + } >> +} >> + >> static int prepare_raw_frame(struct fwht_raw_frame *rf, >> const struct v4l2_fwht_pixfmt_info *info, u8 *buf, >> unsigned int size) >> @@ -243,14 +272,16 @@ int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out) >> >> int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out) >> { >> - unsigned int i, j, k; >> u32 flags; >> struct fwht_cframe cf; >> - u8 *p, *ref_p; >> unsigned int components_num = 3; >> unsigned int version; >> const struct v4l2_fwht_pixfmt_info *info; >> unsigned int hdr_width_div, hdr_height_div; >> + struct fwht_raw_frame dst_rf; >> + unsigned int dst_chroma_stride = state->stride; >> + unsigned int dst_size = state->stride * state->coded_height; >> + unsigned int ref_size; >> >> if (!state->info) >> return -EINVAL; >> @@ -298,241 +329,24 @@ int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out) >> hdr_height_div != info->height_div) >> return -EINVAL; >> >> - if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num, >> - state->visible_width, state->visible_height, >> - state->coded_width)) >> + if (prepare_raw_frame(&dst_rf, info, p_out, dst_size)) >> return -EINVAL; >> + if (info->id == V4L2_PIX_FMT_YUV420 || >> + info->id == V4L2_PIX_FMT_YVU420 || >> + info->id == V4L2_PIX_FMT_YUV422P) >> + dst_chroma_stride /= 2; >> >> - /* >> - * TODO - handle the case where the compressed stream encodes a >> - * different format than the requested decoded format. >> - */ >> - switch (state->info->id) { >> - case V4L2_PIX_FMT_GREY: >> - ref_p = state->ref_frame.luma; >> - for (i = 0; i < state->coded_height; i++) { >> - memcpy(p_out, ref_p, state->visible_width); >> - p_out += state->stride; >> - ref_p += state->coded_width; >> - } >> - break; >> - case V4L2_PIX_FMT_YUV420: >> - case V4L2_PIX_FMT_YUV422P: >> - ref_p = state->ref_frame.luma; >> - for (i = 0; i < state->coded_height; i++) { >> - memcpy(p_out, ref_p, state->visible_width); >> - p_out += state->stride; >> - ref_p += state->coded_width; >> - } >> - >> - ref_p = state->ref_frame.cb; >> - for (i = 0; i < state->coded_height / 2; i++) { >> - memcpy(p_out, ref_p, state->visible_width / 2); >> - p_out += state->stride / 2; >> - ref_p += state->coded_width / 2; >> - } >> - ref_p = state->ref_frame.cr; >> - for (i = 0; i < state->coded_height / 2; i++) { >> - memcpy(p_out, ref_p, state->visible_width / 2); >> - p_out += state->stride / 2; >> - ref_p += state->coded_width / 2; >> - } >> - break; >> - case V4L2_PIX_FMT_YVU420: >> - ref_p = state->ref_frame.luma; >> - for (i = 0; i < state->coded_height; i++) { >> - memcpy(p_out, ref_p, state->visible_width); >> - p_out += state->stride; >> - ref_p += state->coded_width; >> - } >> - >> - ref_p = state->ref_frame.cr; >> - for (i = 0; i < state->coded_height / 2; i++) { >> - memcpy(p_out, ref_p, state->visible_width / 2); >> - p_out += state->stride / 2; >> - ref_p += state->coded_width / 2; >> - } >> - ref_p = state->ref_frame.cb; >> - for (i = 0; i < state->coded_height / 2; i++) { >> - memcpy(p_out, ref_p, state->visible_width / 2); >> - p_out += state->stride / 2; >> - ref_p += state->coded_width / 2; >> - } >> - break; >> - case V4L2_PIX_FMT_NV12: >> - case V4L2_PIX_FMT_NV16: >> - case V4L2_PIX_FMT_NV24: >> - ref_p = state->ref_frame.luma; >> - for (i = 0; i < state->coded_height; i++) { >> - memcpy(p_out, ref_p, state->visible_width); >> - p_out += state->stride; >> - ref_p += state->coded_width; >> - } >> + ref_size = state->coded_width * state->coded_height * >> + info->luma_alpha_step; >> >> - k = 0; >> - for (i = 0; i < state->coded_height / 2; i++) { >> - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { >> - *p++ = state->ref_frame.cb[k]; >> - *p++ = state->ref_frame.cr[k]; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_NV21: >> - case V4L2_PIX_FMT_NV61: >> - case V4L2_PIX_FMT_NV42: >> - ref_p = state->ref_frame.luma; >> - for (i = 0; i < state->coded_height; i++) { >> - memcpy(p_out, ref_p, state->visible_width); >> - p_out += state->stride; >> - ref_p += state->coded_width; >> - } >> + if (prepare_raw_frame(&state->ref_frame, info, state->ref_frame.buf, >> + ref_size)) >> + return -EINVAL; >> >> - k = 0; >> - for (i = 0; i < state->coded_height / 2; i++) { >> - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { >> - *p++ = state->ref_frame.cr[k]; >> - *p++ = state->ref_frame.cb[k]; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_YUYV: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cb[k / 2]; >> - *p++ = state->ref_frame.luma[k + 1]; >> - *p++ = state->ref_frame.cr[k / 2]; >> - k += 2; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_YVYU: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cr[k / 2]; >> - *p++ = state->ref_frame.luma[k + 1]; >> - *p++ = state->ref_frame.cb[k / 2]; >> - k += 2; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_UYVY: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { >> - *p++ = state->ref_frame.cb[k / 2]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cr[k / 2]; >> - *p++ = state->ref_frame.luma[k + 1]; >> - k += 2; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_VYUY: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width / 2; j++) { >> - *p++ = state->ref_frame.cr[k / 2]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cb[k / 2]; >> - *p++ = state->ref_frame.luma[k + 1]; >> - k += 2; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_RGB24: >> - case V4L2_PIX_FMT_HSV24: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width; j++) { >> - *p++ = state->ref_frame.cr[k]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cb[k]; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_BGR24: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width; j++) { >> - *p++ = state->ref_frame.cb[k]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cr[k]; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_RGB32: >> - case V4L2_PIX_FMT_XRGB32: >> - case V4L2_PIX_FMT_HSV32: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width; j++) { >> - *p++ = 0; >> - *p++ = state->ref_frame.cr[k]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cb[k]; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_BGR32: >> - case V4L2_PIX_FMT_XBGR32: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width; j++) { >> - *p++ = state->ref_frame.cb[k]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cr[k]; >> - *p++ = 0; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_ARGB32: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width; j++) { >> - *p++ = state->ref_frame.alpha[k]; >> - *p++ = state->ref_frame.cr[k]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cb[k]; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - case V4L2_PIX_FMT_ABGR32: >> - k = 0; >> - for (i = 0; i < state->coded_height; i++) { >> - for (j = 0, p = p_out; j < state->coded_width; j++) { >> - *p++ = state->ref_frame.cb[k]; >> - *p++ = state->ref_frame.luma[k]; >> - *p++ = state->ref_frame.cr[k]; >> - *p++ = state->ref_frame.alpha[k]; >> - k++; >> - } >> - p_out += state->stride; >> - } >> - break; >> - default: >> + if (!fwht_decode_frame(&cf, &state->ref_frame, flags, components_num, >> + state->visible_width, state->visible_height, >> + state->coded_width, &dst_rf, state->stride, >> + dst_chroma_stride)) >> return -EINVAL; >> - } >> return 0; >> } >> diff --git a/drivers/media/platform/vicodec/codec-v4l2-fwht.h b/drivers/media/platform/vicodec/codec-v4l2-fwht.h >> index aa6fa90a48be..75343cdf45e2 100644 >> --- a/drivers/media/platform/vicodec/codec-v4l2-fwht.h >> +++ b/drivers/media/platform/vicodec/codec-v4l2-fwht.h >> @@ -53,6 +53,9 @@ const struct v4l2_fwht_pixfmt_info *v4l2_fwht_default_fmt(u32 width_div, >> u32 pixenc, >> unsigned int start_idx); >> >> +void copy_cap_to_ref(u8 *cap, const struct v4l2_fwht_pixfmt_info *info, >> + struct v4l2_fwht_state *state); >> + >> int v4l2_fwht_encode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out); >> int v4l2_fwht_decode(struct v4l2_fwht_state *state, u8 *p_in, u8 *p_out); >> >> diff --git a/drivers/media/platform/vicodec/vicodec-core.c b/drivers/media/platform/vicodec/vicodec-core.c >> index 8d38bc1ef079..335a931fdf02 100644 >> --- a/drivers/media/platform/vicodec/vicodec-core.c >> +++ b/drivers/media/platform/vicodec/vicodec-core.c >> @@ -194,6 +194,8 @@ static int device_process(struct vicodec_ctx *ctx, >> ret = v4l2_fwht_decode(state, p_src, p_dst); >> if (ret < 0) >> return ret; >> + copy_cap_to_ref(p_dst, ctx->state.info, &ctx->state); >> + >> vb2_set_plane_payload(&dst_vb->vb2_buf, 0, q_dst->sizeimage); >> } >> >> >