Re: [PATCH v5] media: mediatek: vcodec: Add to support VP9 inner racing mode

From: Hans Verkuil
Date: Fri Nov 04 2022 - 06:19:38 EST


Hi Mingjia,

What are the changes since v4? You didn't mentioned that.

Yunfei, can you review this v5? I prefer to have your Acked/Reviewed-by before merging.

Regards,

Hans

On 25/10/2022 03:46, Mingjia Zhang wrote:
> Enable VP9 inner racing mode
> We send lat trans buffer to the core when trigger lat to work, instead of waiting for the lat decode done.
> It can be reduce decoder latency.
>
> Signed-off-by: Mingjia Zhang <mingjia.zhang@xxxxxxxxxxxx>
> ---
> Changes from v3:
>
> - CTS/GTS test pass
> - Fluster result: Ran 275/303 tests successfully
>
> Changes from v2:
>
> - CTS/GTS test pass
> - Fluster result: Ran 240/303 tests successfully
>
> Changes from v1:
>
> - CTS/GTS test pass
> ---
> .../vcodec/vdec/vdec_vp9_req_lat_if.c | 85 ++++++++++---------
> 1 file changed, 47 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> index 81de876d51267..1b39119c89951 100644
> --- a/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> +++ b/drivers/media/platform/mediatek/vcodec/vdec/vdec_vp9_req_lat_if.c
> @@ -436,6 +436,7 @@ struct vdec_vp9_slice_ref {
> * @frame_ctx: 4 frame context according to VP9 Spec
> * @frame_ctx_helper: 4 frame context according to newest kernel spec
> * @dirty: state of each frame context
> + * @local_vsi: local instance vsi information
> * @init_vsi: vsi used for initialized VP9 instance
> * @vsi: vsi used for decoding/flush ...
> * @core_vsi: vsi used for Core stage
> @@ -482,6 +483,8 @@ struct vdec_vp9_slice_instance {
> struct v4l2_vp9_frame_context frame_ctx_helper;
> unsigned char dirty[4];
>
> + struct vdec_vp9_slice_vsi local_vsi;
> +
> /* MicroP vsi */
> union {
> struct vdec_vp9_slice_init_vsi *init_vsi;
> @@ -1616,16 +1619,10 @@ static int vdec_vp9_slice_update_single(struct vdec_vp9_slice_instance *instance
> }
>
> static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
> - struct vdec_lat_buf *lat_buf,
> - struct vdec_vp9_slice_pfc *pfc)
> + struct vdec_vp9_slice_vsi *vsi)
> {
> - struct vdec_vp9_slice_vsi *vsi;
> -
> - vsi = &pfc->vsi;
> - memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
> -
> mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x %lx %lx\n",
> - pfc->seq, vsi->state.crc[0],
> + (instance->seq - 1), vsi->state.crc[0],
> (unsigned long)vsi->trans.dma_addr,
> (unsigned long)vsi->trans.dma_addr_end);
>
> @@ -2090,6 +2087,13 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
> return ret;
> }
>
> + if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability)) {
> + vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> + memcpy(&instance->local_vsi, vsi, sizeof(*vsi));
> + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> + vsi = &instance->local_vsi;
> + }
> +
> if (instance->irq) {
> ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
> WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
> @@ -2102,22 +2106,25 @@ static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
> }
>
> vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> - ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
> + ret = vdec_vp9_slice_update_lat(instance, vsi);
>
> - /* LAT trans full, no more UBE or decode timeout */
> - if (ret) {
> - mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
> - return ret;
> - }
> + if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> + /* LAT trans full, no more UBE or decode timeout */
> + if (ret) {
> + mtk_vcodec_err(instance, "frame[%d] decode error: %d\n",
> + ret, (instance->seq - 1));
> + return ret;
> + }
>
> - mtk_vcodec_debug(instance, "lat dma addr: 0x%lx 0x%lx\n",
> - (unsigned long)pfc->vsi.trans.dma_addr,
> - (unsigned long)pfc->vsi.trans.dma_addr_end);
>
> - vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
> - vsi->trans.dma_addr_end +
> - ctx->msg_queue.wdma_addr.dma_addr);
> - vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> + vsi->trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
> + vdec_msg_queue_update_ube_wptr(&ctx->msg_queue, vsi->trans.dma_addr_end);
> + if (!IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> + vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> +
> + mtk_vcodec_debug(instance, "lat trans end addr(0x%lx), ube start addr(0x%lx)\n",
> + (unsigned long)vsi->trans.dma_addr_end,
> + (unsigned long)ctx->msg_queue.wdma_addr.dma_addr);
>
> return 0;
> }
> @@ -2139,40 +2146,40 @@ static int vdec_vp9_slice_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
> static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
> {
> struct vdec_vp9_slice_instance *instance;
> - struct vdec_vp9_slice_pfc *pfc;
> + struct vdec_vp9_slice_pfc *pfc = NULL;
> struct mtk_vcodec_ctx *ctx = NULL;
> struct vdec_fb *fb = NULL;
> int ret = -EINVAL;
>
> if (!lat_buf)
> - goto err;
> + return -EINVAL;
>
> pfc = lat_buf->private_data;
> ctx = lat_buf->ctx;
> if (!pfc || !ctx)
> - goto err;
> + return -EINVAL;
>
> instance = ctx->drv_handle;
> if (!instance)
> - goto err;
> + return -EINVAL;
>
> fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
> if (!fb) {
> ret = -EBUSY;
> - goto err;
> + goto vdec_dec_end;
> }
>
> ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
> if (ret) {
> mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n");
> - goto err;
> + goto vdec_dec_end;
> }
> vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
>
> ret = vpu_dec_core(&instance->vpu);
> if (ret) {
> mtk_vcodec_err(instance, "vpu_dec_core\n");
> - goto err;
> + goto vdec_dec_end;
> }
>
> if (instance->irq) {
> @@ -2190,24 +2197,26 @@ static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
> ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
> if (ret) {
> mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n");
> - goto err;
> + goto vdec_dec_end;
> }
>
> - pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
> mtk_vcodec_debug(instance, "core dma_addr_end 0x%lx\n",
> (unsigned long)pfc->vsi.trans.dma_addr_end);
> - vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
> - ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
> -
> - return 0;
>
> -err:
> - if (ctx && pfc) {
> - /* always update read pointer */
> - vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
> +vdec_dec_end:
> + /* always update read pointer */
> + if (IS_VDEC_INNER_RACING(ctx->dev->dec_capability))
> + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> + pfc->vsi.trans.dma_addr);
> + else
> + vdec_msg_queue_update_ube_rptr(&ctx->msg_queue,
> + pfc->vsi.trans.dma_addr_end);
>
> + if (ret) {
> if (fb)
> ctx->dev->vdec_pdata->cap_to_disp(ctx, 1, lat_buf->src_buf_req);
> + } else {
> + ctx->dev->vdec_pdata->cap_to_disp(ctx, 0, lat_buf->src_buf_req);
> }
> return ret;
> }