Re: [PATCH 1/2] mm: Add kernel MMU notifier to manage IOTLB/DEVTLB

From: Bob Liu
Date: Wed Dec 13 2017 - 22:12:14 EST


On 2017/12/14 9:02, Lu Baolu wrote:
> From: Huang Ying <ying.huang@xxxxxxxxx>
>
> Shared Virtual Memory (SVM) allows a kernel memory mapping to be
> shared between CPU and and a device which requested a supervisor
> PASID. Both devices and IOMMU units have TLBs that cache entries
> from CPU's page tables. We need to get a chance to flush them at
> the same time when we flush the CPU TLBs.
>
> We already have an existing MMU notifiers for userspace updates,
> however we lack the same thing for kernel page table updates. To

Sorry, I didn't get which situation need this notification.
Could you please describe the full scenario?

Thanks,
Liubo

> implement the MMU notification mechanism for the kernel address
> space, a kernel MMU notifier chain is defined and will be called
> whenever the CPU TLB is flushed for the kernel address space.
>
> As consumer of this notifier, the IOMMU SVM implementations will
> register callbacks on this notifier and manage the cache entries
> in both IOTLB and DevTLB.
>
> Cc: Ashok Raj <ashok.raj@xxxxxxxxx>
> Cc: Dave Hansen <dave.hansen@xxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> Cc: Andy Lutomirski <luto@xxxxxxxxxx>
> Cc: Rik van Riel <riel@xxxxxxxxxx>
> Cc: Kees Cook <keescook@xxxxxxxxxxxx>
> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
> Cc: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> Cc: Matthew Wilcox <willy@xxxxxxxxxxxxxxx>
> Cc: Dave Jiang <dave.jiang@xxxxxxxxx>
> Cc: Michal Hocko <mhocko@xxxxxxxx>
> Cc: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx>
> Cc: Vegard Nossum <vegard.nossum@xxxxxxxxxx>
> Cc: x86@xxxxxxxxxx
> Cc: linux-mm@xxxxxxxxx
>
> Tested-by: CQ Tang <cq.tang@xxxxxxxxx>
> Signed-off-by: Huang Ying <ying.huang@xxxxxxxxx>
> Signed-off-by: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> ---
> arch/x86/mm/tlb.c | 2 ++
> include/linux/mmu_notifier.h | 33 +++++++++++++++++++++++++++++++++
> mm/mmu_notifier.c | 27 +++++++++++++++++++++++++++
> 3 files changed, 62 insertions(+)
>
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 3118392cd..5ff104f 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -6,6 +6,7 @@
> #include <linux/interrupt.h>
> #include <linux/export.h>
> #include <linux/cpu.h>
> +#include <linux/mmu_notifier.h>
>
> #include <asm/tlbflush.h>
> #include <asm/mmu_context.h>
> @@ -567,6 +568,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> info.end = end;
> on_each_cpu(do_kernel_range_flush, &info, 1);
> }
> + kernel_mmu_notifier_invalidate_range(start, end);
> }
>
> void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
> diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
> index b25dc9d..44d7c06 100644
> --- a/include/linux/mmu_notifier.h
> +++ b/include/linux/mmu_notifier.h
> @@ -408,6 +408,25 @@ extern void mmu_notifier_call_srcu(struct rcu_head *rcu,
> void (*func)(struct rcu_head *rcu));
> extern void mmu_notifier_synchronize(void);
>
> +struct kernel_mmu_address_range {
> + unsigned long start;
> + unsigned long end;
> +};
> +
> +/*
> + * Before the virtual address range managed by kernel (vmalloc/kmap)
> + * is reused, That is, remapped to the new physical addresses, the
> + * kernel MMU notifier will be called with KERNEL_MMU_INVALIDATE_RANGE
> + * and struct kernel_mmu_address_range as parameters. This is used to
> + * manage the remote TLB.
> + */
> +#define KERNEL_MMU_INVALIDATE_RANGE 1
> +extern int kernel_mmu_notifier_register(struct notifier_block *nb);
> +extern int kernel_mmu_notifier_unregister(struct notifier_block *nb);
> +
> +extern int kernel_mmu_notifier_invalidate_range(unsigned long start,
> + unsigned long end);
> +
> #else /* CONFIG_MMU_NOTIFIER */
>
> static inline int mm_has_notifiers(struct mm_struct *mm)
> @@ -474,6 +493,20 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
> #define pudp_huge_clear_flush_notify pudp_huge_clear_flush
> #define set_pte_at_notify set_pte_at
>
> +static inline int kernel_mmu_notifier_register(struct notifier_block *nb)
> +{
> + return 0;
> +}
> +
> +static inline int kernel_mmu_notifier_unregister(struct notifier_block *nb)
> +{
> + return 0;
> +}
> +
> +static inline void kernel_mmu_notifier_invalidate_range(unsigned long start,
> + unsigned long end)
> +{
> +}
> #endif /* CONFIG_MMU_NOTIFIER */
>
> #endif /* _LINUX_MMU_NOTIFIER_H */
> diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
> index 96edb33..52f816a 100644
> --- a/mm/mmu_notifier.c
> +++ b/mm/mmu_notifier.c
> @@ -393,3 +393,30 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
> mmdrop(mm);
> }
> EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release);
> +
> +static ATOMIC_NOTIFIER_HEAD(kernel_mmu_notifier_list);
> +
> +int kernel_mmu_notifier_register(struct notifier_block *nb)
> +{
> + return atomic_notifier_chain_register(&kernel_mmu_notifier_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(kernel_mmu_notifier_register);
> +
> +int kernel_mmu_notifier_unregister(struct notifier_block *nb)
> +{
> + return atomic_notifier_chain_unregister(&kernel_mmu_notifier_list, nb);
> +}
> +EXPORT_SYMBOL_GPL(kernel_mmu_notifier_unregister);
> +
> +int kernel_mmu_notifier_invalidate_range(unsigned long start,
> + unsigned long end)
> +{
> + struct kernel_mmu_address_range range = {
> + .start = start,
> + .end = end,
> + };
> +
> + return atomic_notifier_call_chain(&kernel_mmu_notifier_list,
> + KERNEL_MMU_INVALIDATE_RANGE,
> + &range);
> +}
>