Re: [v2 PATCH] RISC-V: Optimize tlb flush path.

From: Anup Patel
Date: Tue Aug 20 2019 - 04:51:50 EST


On Tue, Aug 20, 2019 at 6:17 AM Atish Patra <atish.patra@xxxxxxx> wrote:
>
> In RISC-V, tlb flush happens via SBI which is expensive.
> If the target cpumask contains a local hartid, some cost
> can be saved by issuing a local tlb flush as we do that
> in OpenSBI anyways. There is also no need of SBI call if
> cpumask is empty.
>
> Do a local flush first if current cpu is present in cpumask.
> Invoke SBI call only if target cpumask contains any cpus
> other than local cpu.
>
> Signed-off-by: Atish Patra <atish.patra@xxxxxxx>
> ---
> arch/riscv/include/asm/tlbflush.h | 37 ++++++++++++++++++++++++++-----
> 1 file changed, 31 insertions(+), 6 deletions(-)
>
> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> index b5e64dc19b9e..3f9cd17b5402 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -8,6 +8,7 @@
> #define _ASM_RISCV_TLBFLUSH_H
>
> #include <linux/mm_types.h>
> +#include <linux/sched.h>
> #include <asm/smp.h>
>
> /*
> @@ -42,20 +43,44 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>
> #include <asm/sbi.h>
>
> -static inline void remote_sfence_vma(struct cpumask *cmask, unsigned long start,
> - unsigned long size)
> +static void __riscv_flush_tlb(struct cpumask *cmask, unsigned long start,
> + unsigned long size)
> {
> struct cpumask hmask;
> + unsigned int hartid;
> + unsigned int cpuid;
>
> cpumask_clear(&hmask);
> +
> + if (!cmask) {
> + riscv_cpuid_to_hartid_mask(cpu_online_mask, &hmask);
> + goto issue_sfence;
> + }
> +
> + cpuid = get_cpu();
> + if (cpumask_test_cpu(cpuid, cmask)) {
> + /* Save trap cost by issuing a local tlb flush here */
> + if ((start == 0 && size == -1) || (size > PAGE_SIZE))
> + local_flush_tlb_all();
> + else if (size == PAGE_SIZE)
> + local_flush_tlb_page(start);
> + }
> + if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids)
> + goto done;
> +
> riscv_cpuid_to_hartid_mask(cmask, &hmask);
> + hartid = cpuid_to_hartid_map(cpuid);
> + cpumask_clear_cpu(hartid, &hmask);
> +
> +issue_sfence:
> sbi_remote_sfence_vma(hmask.bits, start, size);
> +done:
> + put_cpu();
> }
>
> -#define flush_tlb_all() sbi_remote_sfence_vma(NULL, 0, -1)
> -
> +#define flush_tlb_all() __riscv_flush_tlb(NULL, 0, -1)
> #define flush_tlb_range(vma, start, end) \
> - remote_sfence_vma(mm_cpumask((vma)->vm_mm), start, (end) - (start))
> + __riscv_flush_tlb(mm_cpumask((vma)->vm_mm), start, (end) - (start))
>
> static inline void flush_tlb_page(struct vm_area_struct *vma,
> unsigned long addr) {
> @@ -63,7 +88,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
> }
>
> #define flush_tlb_mm(mm) \
> - remote_sfence_vma(mm_cpumask(mm), 0, -1)
> + __riscv_flush_tlb(mm_cpumask(mm), 0, -1)
>
> #endif /* CONFIG_SMP */
>
> --
> 2.21.0
>

I think we should move __riscv_flush_tlb() to mm/tlbflush.c because it's quite
big now.

In future, we will also have __riscv_flush_tlb_asid() which will flush TLB based
on ASID.

Regards,
Anup