Re: [PATCH 08/25] vtime: Exit vtime before exit_notify()

From: Peter Zijlstra
Date: Tue Nov 20 2018 - 08:55:20 EST


On Wed, Nov 14, 2018 at 03:45:52AM +0100, Frederic Weisbecker wrote:
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d458d65..27e0544 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -265,6 +265,8 @@ struct task_cputime {
> enum vtime_state {
> /* Task is sleeping or running in a CPU with VTIME inactive: */
> VTIME_INACTIVE = 0,
> + /* Task has passed exit_notify() */
> + VTIME_DEAD,

How does it make sense for VTIME_DEAD > VTIME_INACTIVE ?

> /* Task is idle */
> VTIME_IDLE,
> /* Task runs in kernelspace in a CPU with VTIME active: */


> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index f64afd7..a0c3a82 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -813,17 +813,31 @@ void vtime_task_switch_generic(struct task_struct *prev)
> {
> struct vtime *vtime = &prev->vtime;
>
> - write_seqcount_begin(&vtime->seqcount);
> - if (vtime->state == VTIME_IDLE)
> - vtime_account_idle(prev);
> - else
> - __vtime_account_kernel(prev, vtime);
> - vtime->state = VTIME_INACTIVE;
> - vtime->cpu = -1;
> - write_seqcount_end(&vtime->seqcount);
> + /*
> + * Flush the prev task vtime, unless it has passed
> + * vtime_exit_task(), in which case there is nothing
> + * left to account.
> + */
> + if (vtime->state != VTIME_DEAD) {
> + write_seqcount_begin(&vtime->seqcount);
> + if (vtime->state == VTIME_IDLE)
> + vtime_account_idle(prev);
> + else
> + __vtime_account_kernel(prev, vtime);
> + vtime->state = VTIME_INACTIVE;
> + vtime->cpu = -1;
> + write_seqcount_end(&vtime->seqcount);
> + }
>
> vtime = &current->vtime;
>
> + /*
> + * Ignore the next task if it has been preempted after
> + * vtime_exit_task().
> + */
> + if (vtime->state == VTIME_DEAD)
> + return;
> +
> write_seqcount_begin(&vtime->seqcount);
> if (is_idle_task(current))
> vtime->state = VTIME_IDLE;

Bit inconsistent; having the one as a indent and the other as an early
return.

> @@ -850,6 +864,30 @@ void vtime_init_idle(struct task_struct *t, int cpu)
> local_irq_restore(flags);
> }
>
> +/*
> + * This is the final settlement point after which we don't account
> + * anymore vtime for this task.
> + */
> +void vtime_exit_task(struct task_struct *t)
> +{
> + struct vtime *vtime = &t->vtime;
> + unsigned long flags;

Note that the code in vtime_task_switch_generic() (above) relies on @t
== current (which is true, but not explicit).

> + local_irq_save(flags);
> + write_seqcount_begin(&vtime->seqcount);
> + /*
> + * A task that has never run on a nohz_full CPU hasn't
> + * been tracked by vtime. Thus it's in VTIME_INACTIVE
> + * state. Nothing to account for it.
> + */
> + if (vtime->state != VTIME_INACTIVE)
> + vtime_account_system(t, vtime);
> + vtime->state = VTIME_DEAD;
> + vtime->cpu = -1;
> + write_seqcount_end(&vtime->seqcount);
> + local_irq_restore(flags);
> +}
> +
> u64 task_gtime(struct task_struct *t)
> {
> struct vtime *vtime = &t->vtime;
> --
> 2.7.4
>