Re: [PATCH v3 1/2] drm/i915/gt: Serialize GRDOM access between multiple engine resets

From: Andi Shyti
Date: Wed Jul 06 2022 - 06:54:22 EST


Hi Mauro and Chris,

On Mon, Jul 04, 2022 at 09:09:28AM +0100, Mauro Carvalho Chehab wrote:
> From: Chris Wilson <chris.p.wilson@xxxxxxxxx>
>
> Don't allow two engines to be reset in parallel, as they would both
> try to select a reset bit (and send requests to common registers)
> and wait on that register, at the same time. Serialize control of
> the reset requests/acks using the uncore->lock, which will also ensure
> that no other GT state changes at the same time as the actual reset.
>
> Cc: stable@xxxxxxxxxxxxxxx # Up to 4.4
> Reported-by: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Mika Kuoppala <mika.kuoppala@xxxxxxxxxxxxxxx>
> Reviewed-by: Andi Shyti <andi.shyti@xxxxxxxxx>
> Acked-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx>
> Signed-off-by: Mauro Carvalho Chehab <mchehab@xxxxxxxxxx>

sorry for the delay but I wanted to understand what has been
agreed between you and Tvrtko about the Cc'ing the stable list.

Anyway, I confirm my review here.

Andi

> ---
>
> To avoid mailbombing on a large number of people, only mailing lists were C/C on the cover.
> See [PATCH v3 0/2] at: https://lore.kernel.org/all/cover.1656921701.git.mchehab@xxxxxxxxxx/
>
> drivers/gpu/drm/i915/gt/intel_reset.c | 37 ++++++++++++++++++++-------
> 1 file changed, 28 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index a5338c3fde7a..c68d36fb5bbd 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -300,9 +300,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
> return err;
> }
>
> -static int gen6_reset_engines(struct intel_gt *gt,
> - intel_engine_mask_t engine_mask,
> - unsigned int retry)
> +static int __gen6_reset_engines(struct intel_gt *gt,
> + intel_engine_mask_t engine_mask,
> + unsigned int retry)
> {
> struct intel_engine_cs *engine;
> u32 hw_mask;
> @@ -321,6 +321,20 @@ static int gen6_reset_engines(struct intel_gt *gt,
> return gen6_hw_domain_reset(gt, hw_mask);
> }
>
> +static int gen6_reset_engines(struct intel_gt *gt,
> + intel_engine_mask_t engine_mask,
> + unsigned int retry)
> +{
> + unsigned long flags;
> + int ret;
> +
> + spin_lock_irqsave(&gt->uncore->lock, flags);
> + ret = __gen6_reset_engines(gt, engine_mask, retry);
> + spin_unlock_irqrestore(&gt->uncore->lock, flags);
> +
> + return ret;
> +}
> +
> static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
> {
> int vecs_id;
> @@ -487,9 +501,9 @@ static void gen11_unlock_sfc(struct intel_engine_cs *engine)
> rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
> }
>
> -static int gen11_reset_engines(struct intel_gt *gt,
> - intel_engine_mask_t engine_mask,
> - unsigned int retry)
> +static int __gen11_reset_engines(struct intel_gt *gt,
> + intel_engine_mask_t engine_mask,
> + unsigned int retry)
> {
> struct intel_engine_cs *engine;
> intel_engine_mask_t tmp;
> @@ -583,8 +597,11 @@ static int gen8_reset_engines(struct intel_gt *gt,
> struct intel_engine_cs *engine;
> const bool reset_non_ready = retry >= 1;
> intel_engine_mask_t tmp;
> + unsigned long flags;
> int ret;
>
> + spin_lock_irqsave(&gt->uncore->lock, flags);
> +
> for_each_engine_masked(engine, gt, engine_mask, tmp) {
> ret = gen8_engine_reset_prepare(engine);
> if (ret && !reset_non_ready)
> @@ -612,17 +629,19 @@ static int gen8_reset_engines(struct intel_gt *gt,
> * This is best effort, so ignore any error from the initial reset.
> */
> if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
> - gen11_reset_engines(gt, gt->info.engine_mask, 0);
> + __gen11_reset_engines(gt, gt->info.engine_mask, 0);
>
> if (GRAPHICS_VER(gt->i915) >= 11)
> - ret = gen11_reset_engines(gt, engine_mask, retry);
> + ret = __gen11_reset_engines(gt, engine_mask, retry);
> else
> - ret = gen6_reset_engines(gt, engine_mask, retry);
> + ret = __gen6_reset_engines(gt, engine_mask, retry);
>
> skip_reset:
> for_each_engine_masked(engine, gt, engine_mask, tmp)
> gen8_engine_reset_cancel(engine);
>
> + spin_unlock_irqrestore(&gt->uncore->lock, flags);
> +
> return ret;
> }
>
> --
> 2.36.1